# HAR CNN training 

In [309]:
# Imports
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score
import matplotlib.pyplot as plt
%matplotlib inline

In [310]:
%run utils/utilities.py

In [311]:
class_label_vn = [u"Cổ tay",u"Cổ chân",u"Bả vai",u"Xoay người",u"Xoay đầu gối",u"Đi bộ",u"Chạy",u"Đá bóng",u"Đạp",u"Đánh răng",u"Rửa tay",u"Lau bàn",u"Quét nhà",u"Nạo",u"Thái",u"Trộn",u"Lên cầu thang",u"Xuống cầu thang"]

## Prepare data

In [312]:
rootDatasetDir = "./datasets/PTIT/normalized"
X_train, labels_train, list_ch_train = read_data(data_path="./datasets/PTIT/normalized", split="train") # train
X_test, labels_test, list_ch_test = read_data(data_path=rootDatasetDir, split="test") # test
assert list_ch_train == list_ch_test, "Mistmatch in channels!"

x_sensor_acc
x_watch_acc
x_watch_gyr
y_sensor_acc
y_watch_acc
y_watch_gyr
z_sensor_acc
z_watch_acc
z_watch_gyr
x_sensor_acc
x_watch_acc
x_watch_gyr
y_sensor_acc
y_watch_acc
y_watch_gyr
z_sensor_acc
z_watch_acc
z_watch_gyr


In [313]:
# Normalize?
X_train, X_test = standardize(X_train, X_test)

Train/Validation Split

In [314]:
X_tr, X_vld, lab_tr, lab_vld = train_test_split(X_train, labels_train, 
                                                stratify = labels_train, random_state = 123)

One-hot encoding:

In [315]:
y_tr = one_hot(lab_tr)
y_vld = one_hot(lab_vld)
y_test = one_hot(labels_test)

In [316]:
# Imports
import tensorflow as tf

### Hyperparameters

In [317]:
batch_size = 600       # Batch size
seq_len = WINDOWN_SIZE          # Number of steps or window size
learning_rate = 0.0001
epochs = 4000

n_classes = NUM_CLASS
n_channels = NUM_CHANNEL
print "n_channels %d" % n_channels

n_channels 9


### Construct the graph
Placeholders

In [318]:
graph = tf.Graph()

# Construct placeholders
with graph.as_default():
    inputs_ = tf.placeholder(tf.float32, [None, seq_len, n_channels], name = 'inputs')
    labels_ = tf.placeholder(tf.float32, [None, n_classes], name = 'labels')
    keep_prob_ = tf.placeholder(tf.float32, name = 'keep')
    learning_rate_ = tf.placeholder(tf.float32, name = 'learning_rate')

Build Convolutional Layers

In [319]:
with graph.as_default():
    # (batch, 128, 9) --> (batch, 64, 18)
    conv1 = tf.layers.conv1d(inputs=inputs_, filters=18, kernel_size=2, strides=1, 
                             padding='same', activation = tf.nn.relu)
    max_pool_1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same')
    
    # (batch, 64, 18) --> (batch, 32, 36)
    conv2 = tf.layers.conv1d(inputs=max_pool_1, filters=36, kernel_size=2, strides=1, 
                             padding='same', activation = tf.nn.relu)
    max_pool_2 = tf.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2, padding='same')
    
    # (batch, 32, 36) --> (batch, 16, 72)
    conv3 = tf.layers.conv1d(inputs=max_pool_2, filters=72, kernel_size=2, strides=1, 
                             padding='same', activation = tf.nn.relu)
    max_pool_3 = tf.layers.max_pooling1d(inputs=conv3, pool_size=2, strides=2, padding='same')
    
    # (batch, 16, 72) --> (batch, 8, 144)
    conv4 = tf.layers.conv1d(inputs=max_pool_3, filters=144, kernel_size=2, strides=1, 
                             padding='same', activation = tf.nn.relu)
    max_pool_4 = tf.layers.max_pooling1d(inputs=conv4, pool_size=2, strides=2, padding='same')

Now, flatten and pass to the classifier

In [320]:
with graph.as_default():
    # Flatten and add dropout
    flat = tf.reshape(max_pool_4, (-1, 8*144))
    flat = tf.nn.dropout(flat, keep_prob=keep_prob_)
    
    # Predictions
    logits = tf.layers.dense(flat, n_classes)
    
    # Cost function and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_))
    optimizer = tf.train.AdamOptimizer(learning_rate_).minimize(cost)
    
    # Accuracy
    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

### Train the network

In [None]:
if (os.path.exists('checkpoints-cnn') == False):
    !mkdir checkpoints-cnn

In [None]:
validation_acc = []
validation_loss = []

train_acc = []
train_loss = []

with graph.as_default():
    saver = tf.train.Saver()

with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    iteration = 1
   
    # Loop over epochs
    for e in range(epochs):
        
        # Loop over batches
        for x,y in get_batches(X_tr, y_tr, batch_size):
            
            # Feed dictionary
            feed = {inputs_ : x, labels_ : y, keep_prob_ : 0.5, learning_rate_ : learning_rate}
            
            # Loss
            loss, _ , acc = sess.run([cost, optimizer, accuracy], feed_dict = feed)
            train_acc.append(acc)
            train_loss.append(loss)
            
            # Print at each 5 iters
            if (iteration % 5 == 0):
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {:d}".format(iteration),
                      "Train loss: {:6f}".format(loss),
                      "Train acc: {:.6f}".format(acc))
            
            # Compute validation loss at every 10 iterations
            if (iteration%10 == 0):                
                val_acc_ = []
                val_loss_ = []
                
                for x_v, y_v in get_batches(X_vld, y_vld, batch_size):
                    # Feed
                    feed = {inputs_ : x_v, labels_ : y_v, keep_prob_ : 1.0}  
                    
                    # Loss
                    loss_v, acc_v = sess.run([cost, accuracy], feed_dict = feed)                    
                    val_acc_.append(acc_v)
                    val_loss_.append(loss_v)
                
                # Print info
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {:d}".format(iteration),
                      "Validation loss: {:6f}".format(np.mean(val_loss_)),
                      "Validation acc: {:.6f}".format(np.mean(val_acc_)))
                
                # Store
                validation_acc.append(np.mean(val_acc_))
                validation_loss.append(np.mean(val_loss_))
            
            # Iterate 
            iteration += 1
    
    saver.save(sess,"checkpoints-cnn/har.ckpt")

('Epoch: 0/4000', 'Iteration: 5', 'Train loss: 2.892627', 'Train acc: 0.073333')
('Epoch: 1/4000', 'Iteration: 10', 'Train loss: 2.862967', 'Train acc: 0.060000')
('Epoch: 1/4000', 'Iteration: 10', 'Validation loss: 2.794882', 'Validation acc: 0.090000')
('Epoch: 2/4000', 'Iteration: 15', 'Train loss: 2.823937', 'Train acc: 0.071667')
('Epoch: 3/4000', 'Iteration: 20', 'Train loss: 2.813320', 'Train acc: 0.083333')
('Epoch: 3/4000', 'Iteration: 20', 'Validation loss: 2.747391', 'Validation acc: 0.103333')
('Epoch: 4/4000', 'Iteration: 25', 'Train loss: 2.791797', 'Train acc: 0.090000')
('Epoch: 5/4000', 'Iteration: 30', 'Train loss: 2.761244', 'Train acc: 0.086667')
('Epoch: 5/4000', 'Iteration: 30', 'Validation loss: 2.714320', 'Validation acc: 0.111667')
('Epoch: 6/4000', 'Iteration: 35', 'Train loss: 2.776452', 'Train acc: 0.095000')
('Epoch: 7/4000', 'Iteration: 40', 'Train loss: 2.760308', 'Train acc: 0.076667')
('Epoch: 7/4000', 'Iteration: 40', 'Validation loss: 2.688966', 'Vali

('Epoch: 64/4000', 'Iteration: 325', 'Train loss: 1.398740', 'Train acc: 0.548333')
('Epoch: 65/4000', 'Iteration: 330', 'Train loss: 1.417905', 'Train acc: 0.540000')
('Epoch: 65/4000', 'Iteration: 330', 'Validation loss: 1.296657', 'Validation acc: 0.630000')
('Epoch: 66/4000', 'Iteration: 335', 'Train loss: 1.382661', 'Train acc: 0.546667')
('Epoch: 67/4000', 'Iteration: 340', 'Train loss: 1.350232', 'Train acc: 0.556667')
('Epoch: 67/4000', 'Iteration: 340', 'Validation loss: 1.261905', 'Validation acc: 0.646667')
('Epoch: 68/4000', 'Iteration: 345', 'Train loss: 1.312114', 'Train acc: 0.588333')
('Epoch: 69/4000', 'Iteration: 350', 'Train loss: 1.326054', 'Train acc: 0.578333')
('Epoch: 69/4000', 'Iteration: 350', 'Validation loss: 1.230545', 'Validation acc: 0.650000')
('Epoch: 70/4000', 'Iteration: 355', 'Train loss: 1.285541', 'Train acc: 0.576667')
('Epoch: 71/4000', 'Iteration: 360', 'Train loss: 1.268187', 'Train acc: 0.583333')
('Epoch: 71/4000', 'Iteration: 360', 'Validati

('Epoch: 127/4000', 'Iteration: 640', 'Train loss: 0.764879', 'Train acc: 0.778333')
('Epoch: 127/4000', 'Iteration: 640', 'Validation loss: 0.782954', 'Validation acc: 0.750000')
('Epoch: 128/4000', 'Iteration: 645', 'Train loss: 0.777514', 'Train acc: 0.743333')
('Epoch: 129/4000', 'Iteration: 650', 'Train loss: 0.790075', 'Train acc: 0.740000')
('Epoch: 129/4000', 'Iteration: 650', 'Validation loss: 0.774867', 'Validation acc: 0.748333')
('Epoch: 130/4000', 'Iteration: 655', 'Train loss: 0.776719', 'Train acc: 0.750000')
('Epoch: 131/4000', 'Iteration: 660', 'Train loss: 0.792594', 'Train acc: 0.720000')
('Epoch: 131/4000', 'Iteration: 660', 'Validation loss: 0.768874', 'Validation acc: 0.753333')
('Epoch: 132/4000', 'Iteration: 665', 'Train loss: 0.783337', 'Train acc: 0.743333')
('Epoch: 133/4000', 'Iteration: 670', 'Train loss: 0.774053', 'Train acc: 0.755000')
('Epoch: 133/4000', 'Iteration: 670', 'Validation loss: 0.765770', 'Validation acc: 0.756667')
('Epoch: 134/4000', 'Iter

('Epoch: 189/4000', 'Iteration: 950', 'Train loss: 0.558233', 'Train acc: 0.813333')
('Epoch: 189/4000', 'Iteration: 950', 'Validation loss: 0.627231', 'Validation acc: 0.813333')
('Epoch: 190/4000', 'Iteration: 955', 'Train loss: 0.547008', 'Train acc: 0.815000')
('Epoch: 191/4000', 'Iteration: 960', 'Train loss: 0.546017', 'Train acc: 0.826667')
('Epoch: 191/4000', 'Iteration: 960', 'Validation loss: 0.625464', 'Validation acc: 0.811667')
('Epoch: 192/4000', 'Iteration: 965', 'Train loss: 0.535814', 'Train acc: 0.828333')
('Epoch: 193/4000', 'Iteration: 970', 'Train loss: 0.532213', 'Train acc: 0.835000')
('Epoch: 193/4000', 'Iteration: 970', 'Validation loss: 0.623497', 'Validation acc: 0.813333')
('Epoch: 194/4000', 'Iteration: 975', 'Train loss: 0.517902', 'Train acc: 0.838333')
('Epoch: 195/4000', 'Iteration: 980', 'Train loss: 0.544703', 'Train acc: 0.805000')
('Epoch: 195/4000', 'Iteration: 980', 'Validation loss: 0.617541', 'Validation acc: 0.818333')
('Epoch: 196/4000', 'Iter

('Epoch: 250/4000', 'Iteration: 1255', 'Train loss: 0.423598', 'Train acc: 0.861667')
('Epoch: 251/4000', 'Iteration: 1260', 'Train loss: 0.414697', 'Train acc: 0.871667')
('Epoch: 251/4000', 'Iteration: 1260', 'Validation loss: 0.540705', 'Validation acc: 0.831667')
('Epoch: 252/4000', 'Iteration: 1265', 'Train loss: 0.427935', 'Train acc: 0.868333')
('Epoch: 253/4000', 'Iteration: 1270', 'Train loss: 0.416902', 'Train acc: 0.878333')
('Epoch: 253/4000', 'Iteration: 1270', 'Validation loss: 0.544589', 'Validation acc: 0.828333')
('Epoch: 254/4000', 'Iteration: 1275', 'Train loss: 0.425454', 'Train acc: 0.865000')
('Epoch: 255/4000', 'Iteration: 1280', 'Train loss: 0.431345', 'Train acc: 0.860000')
('Epoch: 255/4000', 'Iteration: 1280', 'Validation loss: 0.537175', 'Validation acc: 0.833333')
('Epoch: 256/4000', 'Iteration: 1285', 'Train loss: 0.430527', 'Train acc: 0.861667')
('Epoch: 257/4000', 'Iteration: 1290', 'Train loss: 0.386181', 'Train acc: 0.876667')
('Epoch: 257/4000', 'Ite

('Epoch: 312/4000', 'Iteration: 1565', 'Train loss: 0.339897', 'Train acc: 0.893333')
('Epoch: 313/4000', 'Iteration: 1570', 'Train loss: 0.320734', 'Train acc: 0.895000')
('Epoch: 313/4000', 'Iteration: 1570', 'Validation loss: 0.494049', 'Validation acc: 0.846667')
('Epoch: 314/4000', 'Iteration: 1575', 'Train loss: 0.302111', 'Train acc: 0.896667')
('Epoch: 315/4000', 'Iteration: 1580', 'Train loss: 0.311244', 'Train acc: 0.900000')
('Epoch: 315/4000', 'Iteration: 1580', 'Validation loss: 0.489286', 'Validation acc: 0.850000')
('Epoch: 316/4000', 'Iteration: 1585', 'Train loss: 0.311507', 'Train acc: 0.903333')
('Epoch: 317/4000', 'Iteration: 1590', 'Train loss: 0.297187', 'Train acc: 0.908333')
('Epoch: 317/4000', 'Iteration: 1590', 'Validation loss: 0.487044', 'Validation acc: 0.853333')
('Epoch: 318/4000', 'Iteration: 1595', 'Train loss: 0.344687', 'Train acc: 0.906667')
('Epoch: 319/4000', 'Iteration: 1600', 'Train loss: 0.309433', 'Train acc: 0.901667')
('Epoch: 319/4000', 'Ite

('Epoch: 374/4000', 'Iteration: 1875', 'Train loss: 0.267449', 'Train acc: 0.908333')
('Epoch: 375/4000', 'Iteration: 1880', 'Train loss: 0.228108', 'Train acc: 0.926667')
('Epoch: 375/4000', 'Iteration: 1880', 'Validation loss: 0.458492', 'Validation acc: 0.865000')
('Epoch: 376/4000', 'Iteration: 1885', 'Train loss: 0.239955', 'Train acc: 0.928333')
('Epoch: 377/4000', 'Iteration: 1890', 'Train loss: 0.247030', 'Train acc: 0.923333')
('Epoch: 377/4000', 'Iteration: 1890', 'Validation loss: 0.457977', 'Validation acc: 0.861667')
('Epoch: 378/4000', 'Iteration: 1895', 'Train loss: 0.222181', 'Train acc: 0.931667')
('Epoch: 379/4000', 'Iteration: 1900', 'Train loss: 0.252415', 'Train acc: 0.928333')
('Epoch: 379/4000', 'Iteration: 1900', 'Validation loss: 0.460856', 'Validation acc: 0.863333')
('Epoch: 380/4000', 'Iteration: 1905', 'Train loss: 0.244864', 'Train acc: 0.926667')
('Epoch: 381/4000', 'Iteration: 1910', 'Train loss: 0.239023', 'Train acc: 0.926667')
('Epoch: 381/4000', 'Ite

('Epoch: 436/4000', 'Iteration: 2185', 'Train loss: 0.191817', 'Train acc: 0.948333')
('Epoch: 437/4000', 'Iteration: 2190', 'Train loss: 0.193903', 'Train acc: 0.951667')
('Epoch: 437/4000', 'Iteration: 2190', 'Validation loss: 0.432195', 'Validation acc: 0.875000')
('Epoch: 438/4000', 'Iteration: 2195', 'Train loss: 0.179818', 'Train acc: 0.953333')
('Epoch: 439/4000', 'Iteration: 2200', 'Train loss: 0.190978', 'Train acc: 0.958333')
('Epoch: 439/4000', 'Iteration: 2200', 'Validation loss: 0.439316', 'Validation acc: 0.876667')
('Epoch: 440/4000', 'Iteration: 2205', 'Train loss: 0.200056', 'Train acc: 0.921667')
('Epoch: 441/4000', 'Iteration: 2210', 'Train loss: 0.190041', 'Train acc: 0.940000')
('Epoch: 441/4000', 'Iteration: 2210', 'Validation loss: 0.438608', 'Validation acc: 0.878333')
('Epoch: 442/4000', 'Iteration: 2215', 'Train loss: 0.183457', 'Train acc: 0.955000')
('Epoch: 443/4000', 'Iteration: 2220', 'Train loss: 0.200540', 'Train acc: 0.933333')
('Epoch: 443/4000', 'Ite

('Epoch: 498/4000', 'Iteration: 2495', 'Train loss: 0.164251', 'Train acc: 0.945000')
('Epoch: 499/4000', 'Iteration: 2500', 'Train loss: 0.165279', 'Train acc: 0.948333')
('Epoch: 499/4000', 'Iteration: 2500', 'Validation loss: 0.431651', 'Validation acc: 0.881667')
('Epoch: 500/4000', 'Iteration: 2505', 'Train loss: 0.178617', 'Train acc: 0.946667')
('Epoch: 501/4000', 'Iteration: 2510', 'Train loss: 0.171989', 'Train acc: 0.946667')
('Epoch: 501/4000', 'Iteration: 2510', 'Validation loss: 0.431908', 'Validation acc: 0.880000')
('Epoch: 502/4000', 'Iteration: 2515', 'Train loss: 0.158336', 'Train acc: 0.960000')
('Epoch: 503/4000', 'Iteration: 2520', 'Train loss: 0.142838', 'Train acc: 0.963333')
('Epoch: 503/4000', 'Iteration: 2520', 'Validation loss: 0.428390', 'Validation acc: 0.881667')
('Epoch: 504/4000', 'Iteration: 2525', 'Train loss: 0.156323', 'Train acc: 0.958333')
('Epoch: 505/4000', 'Iteration: 2530', 'Train loss: 0.168038', 'Train acc: 0.950000')
('Epoch: 505/4000', 'Ite

In [None]:
# Plot training and test loss
t = np.arange(iteration-1)

plt.figure(figsize = (6,6))
plt.plot(t, np.array(train_loss), 'r-', t[t % 10 == 0], np.array(validation_loss), 'b*')
plt.xlabel("iteration")
plt.ylabel("Loss")
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
# Plot Accuracies
plt.figure(figsize = (6,6))

plt.plot(t, np.array(train_acc), 'r-', t[t % 10 == 0], validation_acc, 'b*')
plt.xlabel("iteration")
plt.ylabel("Accuray")
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

## Evaluate on test set

In [None]:
test_acc = []
with graph.as_default():
    prediction=tf.argmax(logits,1)
with tf.Session(graph=graph) as sess:
    # Restore
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints-cnn'))
    y_pred = []
    y_true = []
    for x_t, y_t in get_batches(X_test, y_test, batch_size):
        feed = {inputs_: x_t,
                labels_: y_t,
                keep_prob_: 1}
        batch_acc, batch_y_pred = sess.run([accuracy, prediction], feed_dict=feed)
        y_pred.extend(batch_y_pred)
        y_true.extend(np.where(r==1)[0][0] for r in y_t )
        
        test_acc.append(batch_acc)
    print("Test accuracy: {:.6f}".format(np.mean(test_acc)))
#     print y_true
#     print y_pred
    sk_class_labels = [i for i in range(NUM_CLASS)]
    print precision_recall_fscore_support(y_true, y_pred, average=None, labels=sk_class_labels)
    print 'Accuracy:', accuracy_score(y_true, y_pred)
    print 'F1 score:', f1_score(y_true, y_pred, average='micro')
    print 'Recall:', recall_score(y_true, y_pred, average='micro')
    print 'Precision:', precision_score(y_true, y_pred, average='micro')
    print '\n clasification report:\n', classification_report(y_true,y_pred)
    print '\n confussion matrix:\n',confusion_matrix(y_true, y_pred)