In [1]:
from __future__ import division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# hyper-parameters for the model
batch_size = 256
cell_size = 256
dropout_rate = 0.85 #pkeep
epochs = 1
alpha = 1e-5 #learning rate
num_classes = 2
sequence_length = 21
svm_c = 0.5

# Project Title

---
# 1. Dataset
Description of dataset and operations in the next step

In [25]:
# TODO: Replace <FILL IN> with appropriate code

# Loading the training features and labels
train_data = np.load('./gru-svm/dataset/train/train_data.npy')
train_labels = train_data[:,17]
train_features = np.delete(arr=train_data, obj=[17], axis=1)
train_features = train_features.astype(np.float32)

# Loading the test features and labels
test_data = np.load('./gru-svm/dataset/test/test_data.npy')
test_labels = test_data[:,17]
test_features = np.delete(arr=test_data, obj=[17], axis=1)
test_features = test_features.astype(np.float32)

train_size = train_features.shape[0]
test_size = test_features.shape[0]

# Modify data size to be a multiple of batch size
train_features = train_features[:train_size-(train_size % batch_size)]
train_labels = train_labels[:train_size-(train_size % batch_size)]

test_features = test_features[:test_size-(test_size % batch_size)]
test_labels = test_labels[:test_size-(test_size % batch_size)]

In [24]:
print(train_features[0])
print(test_features[0])

print(train_labels[0])
print(test_labels[0])

print(train_features.shape)
print(test_features.shape)

[ 6.  0.  2.  1.  5.  3.  0.  7.  2.  2.  2.  0.  0.  1.  0.  0.  0.  4.
  6.  1.  1.]
[ 7.  4.  7.  7.  0.  0.  1.  1.  0.  8.  0.  1.  1.  2.  0.  0.  0.  9.
  3.  6.  1.]
1
0
(1898240, 21)
(420608, 21)


---
# 2. GRU-SVM

### Initialize variables and build the model

In [4]:
## Initialize variables and build model

# Data variables
x = tf.placeholder(dtype=tf.uint8, shape=[None, sequence_length], name='x')

# [BATCH_SIZE, SEQUENCE_LENGTH, 10]
x_onehot = tf.one_hot(indices=x, depth=10, on_value=1.0, off_value=0.0, name='x_onehot')

# [BATCH_SIZE]
y = tf.placeholder(dtype=tf.uint8, shape=[None], name='y')

# [BATCH_SIZE, N_CLASSES]
y_onehot = tf.one_hot(indices=y, depth=num_classes, on_value=1.0, off_value=-1.0, name='y_onehot')

state = tf.placeholder(dtype=tf.float32, shape=[None, cell_size], name='initial_state')

p_keep = tf.placeholder(dtype=tf.float32, name='p_keep')
learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')

# GRU Layer
cell = tf.contrib.rnn.GRUCell(cell_size)
drop_cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=p_keep) 

outputs, states = tf.nn.dynamic_rnn(drop_cell, x_onehot, initial_state=state, dtype=tf.float32)

states = tf.identity(states, name='H')


with tf.name_scope('final_training_ops'):
    with tf.name_scope('weights'):
        weight = tf.get_variable('weights', initializer=tf.random_normal([cell_size, num_classes], 
                                                                         stddev=0.01))
#         self.variable_summaries(weight)
    with tf.name_scope('biases'):
        bias = tf.get_variable('biases', initializer=tf.constant(0.1, shape=[num_classes]))
#         self.variable_summaries(bias) 
    hf = tf.transpose(outputs, [1, 0, 2])
    last = tf.gather(hf, int(hf.get_shape()[0]) - 1)
    with tf.name_scope('Wx_plus_b'):
        output = tf.matmul(last, weight) + bias
        tf.summary.histogram('pre-activations', output)

# SVM Layer
with tf.name_scope('svm'):
    regularization_loss = 0.5 * tf.reduce_sum(tf.square(weight))
    hinge_loss = tf.reduce_sum(tf.square(tf.maximum(tf.zeros([batch_size, num_classes]), 
                                                    1 - y_onehot * output)))
    with tf.name_scope('loss'):
        loss = regularization_loss + svm_c * hinge_loss
tf.summary.scalar('loss', loss)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

with tf.name_scope('accuracy'):
    predicted_class = tf.sign(output)
    predicted_class = tf.identity(predicted_class, name='prediction')
    with tf.name_scope('correct_prediction'):
        correct = tf.equal(tf.argmax(predicted_class, 1), tf.argmax(y_onehot, 1))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
tf.summary.scalar('accuracy', accuracy)

merged = tf.summary.merge_all()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [32]:
current_state = np.zeros([batch_size, cell_size])

# variables initializer
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())


with tf.Session() as sess:
    sess.run(init_op)
    
    for step in range(epochs * train_size // batch_size):
        offset = (step * batch_size) % train_size
        train_data=[train_features, train_labels]
        train_example_batch = train_data[0][offset:(offset + batch_size)]
        train_label_batch = train_data[1][offset:(offset + batch_size)]

        # dictionary for key-value pair input for training
        feed_dict = {x: train_example_batch, y: train_label_batch,
                                 state: current_state,
                                 learning_rate: alpha, p_keep: dropout_rate}

        train_summary, _, predictions, actual, next_state = sess.run([merged, optimizer,
                                                                                  predicted_class, y_onehot,
                                                                                  states],
                                                                                 feed_dict=feed_dict)
        if step % 100 == 0:
            # get train loss and accuracy
            train_loss, train_accuracy = sess.run([loss, accuracy], feed_dict=feed_dict)

            # display train loss and accuracy
            print('step [{}] train -- loss : {}, accuracy : {}'.format(step, train_loss, train_accuracy))

        current_state = next_state
    
    print('Starting with the testing phase')
    overall_accuracy = 0  #Initialize variable
    # Testing the model
    for step in range(epochs * test_size // batch_size):

        offset = (step * batch_size) % test_size
        test_data=[test_features, test_labels]
        test_example_batch = test_data[0][offset:(offset + batch_size)]
        test_label_batch = test_data[1][offset:(offset + batch_size)]

#         print(test_example_batch.shape)
#         print(test_label_batch.shape)

        # dictionary for key-value pair input for validation
        feed_dict = {x: test_example_batch, y: test_label_batch,
        state: np.zeros([batch_size, cell_size]), p_keep: 1.0}

        test_summary, predictions, actual, test_loss, test_accuracy = \
                        sess.run([merged, predicted_class, y_onehot, loss, accuracy],
                                 feed_dict=feed_dict)
        overall_accuracy = (overall_accuracy*(step) + test_accuracy)/(step+1)
        
        # Display test loss and accuracy every 100 steps
        if step % 100 == 0 and step > 0:

        # add the validation summary
        # validation_writer.add_summary(validation_summary, step)

        # display test loss and accuracy
            print('step [{}] test -- loss : {}, accuracy : {}, overall accuracy: {}'.
                  format(step, test_loss, test_accuracy,overall_accuracy))

step [0] train -- loss : 259.5896301269531, accuracy : 0.1796875
step [100] train -- loss : 254.0306396484375, accuracy : 0.2734375
step [200] train -- loss : 232.5886993408203, accuracy : 0.16796875
step [300] train -- loss : 230.5184326171875, accuracy : 0.80859375
step [400] train -- loss : 262.80792236328125, accuracy : 0.5234375
step [500] train -- loss : 256.8785705566406, accuracy : 0.5390625
step [600] train -- loss : 255.4293212890625, accuracy : 0.5390625
step [700] train -- loss : 242.49151611328125, accuracy : 0.58984375
step [800] train -- loss : 249.51719665527344, accuracy : 0.55078125
step [900] train -- loss : 246.8617706298828, accuracy : 0.52734375
step [1000] train -- loss : 292.51287841796875, accuracy : 0.33203125
step [1100] train -- loss : 255.4005126953125, accuracy : 0.6171875
step [1200] train -- loss : 237.19503784179688, accuracy : 0.82421875
step [1300] train -- loss : 240.17996215820312, accuracy : 0.6171875
step [1400] train -- loss : 191.07730102539062,