In [1]:
import os
import re
from tqdm import tqdm
import pandas as pd
from collections import Counter
import pickle
import numpy as np
import tensorflow as tf

In [2]:
zeroday_lbl_x = pd.read_csv("zeroday_lbl_x.csv").values

In [3]:
zeroday_lbl_y = pd.read_csv("zeroday_lbl_y.csv").values

In [4]:
with open('zeroday_lbl_wordsize.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    word_size = pickle.load(f)

In [5]:
with open('zeroday_vocab.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    vocab = pickle.load(f)

In [6]:
len(vocab)

2795

In [7]:
onehot_y = np.vstack((np.eye(3), [0]*3))
onehot_y

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  0.]])

In [8]:
zeroday_onehot_y = [onehot_y[item] for item in zeroday_lbl_y]
# zeroday_onehot_y

In [9]:
zeroday_onehot_y[0]

array([[ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.

In [10]:
onehot_x = np.vstack((np.eye(len(vocab)), [0]*len(vocab)))
onehot_x

array([[ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  1.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  1., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [11]:
zeroday_onehot_x = [onehot_x[item] for item in zeroday_lbl_x]
# zeroday_onehot_x

In [12]:
zeroday_onehot_x[0]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [13]:
class Batcher():
    def __init__(self, x, y, x_batch_size):
        self.split_size = int(len(x)*0.8)
        self.train_x = x[:self.split_size]
        self.train_y = y[:self.split_size]
        self.train_size = x_batch_size[:self.split_size]
        self.test_x = x[self.split_size:]
        self.test_y = y[self.split_size:]
        self.test_size = x_batch_size[self.split_size:]
        self.start = 0
    def next_batch(self, batch_size):
        s_index = self.start
        e_index = self.start + batch_size
        if e_index >= self.split_size:
            self.start = 0
            s_index = self.start
            e_index = self.start + batch_size
        self.start = e_index
        return self.train_x[s_index:e_index], self.train_y[s_index:e_index], self.train_size[s_index:e_index]

In [14]:
zeroday_batch = Batcher(zeroday_onehot_x, zeroday_onehot_y, word_size)

In [15]:
batch_data, batch_labels, batch_seqlen = zeroday_batch.next_batch(10)

In [16]:
batch_data[:2]

[array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]]),
 array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]])]

In [17]:
batch_labels[:2]

[array([[ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.],
        [ 0.,  0.,  1.],
        [ 0.,  0.,  1.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],
        [ 1.,  0.,  0.],


In [18]:
batch_labels[0]

array([[ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.

In [19]:
np.argmax(batch_labels[0][-1])

0

In [20]:
batch_seqlen[:2]

[83, 89]

In [21]:
len(word_size)

1527

In [22]:
word_size_pd = pd.DataFrame(word_size)
word_size_pd.describe()

Unnamed: 0,0
count,1527.0
mean,80.619515
std,9.025809
min,49.0
25%,74.0
50%,82.0
75%,87.0
max,114.0


In [23]:
word_size_pd[word_size_pd[0]>100]

Unnamed: 0,0
166,105
167,105
181,114
218,106
291,106
317,109
321,101
364,101
394,104
438,102


In [24]:
len(zeroday_lbl_y), len(zeroday_lbl_y[0])

(1527, 114)

In [25]:
seq_max_len = len(zeroday_lbl_y[0])

In [26]:
tf.reset_default_graph()

# Parameters
learning_rate = 0.01
training_steps = 500
batch_size = 100
display_step = 50

# Network Parameters
seq_max_len = len(zeroday_lbl_y[0]) # Sequence max length
forward_units = 128 # hidden layer num of features
backward_units = 128 # hidden layer num of features
n_classes = 3 # linear sequence or not

# Batcher zeroday_batch
zeroday_batch = Batcher(zeroday_onehot_x, zeroday_onehot_y, word_size)

# tf Graph input
x = tf.placeholder("float", [None, seq_max_len, len(vocab)])
y = tf.placeholder("float", [None, seq_max_len, n_classes])
# A placeholder for indicating each sequence length
seqlen = tf.placeholder(tf.int32, [None,])

    
with tf.variable_scope("bi-GRU") as scope:
    # Define a lstm cell with tensorflow
    encoder_fw = tf.contrib.rnn.GRUCell(forward_units)
    encoder_bw = tf.contrib.rnn.GRUCell(backward_units)

    outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_fw, 
                                            cell_bw=encoder_bw, 
                                            inputs=x,
                                            sequence_length=seqlen,
                                            dtype=tf.float32)

    print(outputs)
    output = tf.concat(outputs, 2)
    print(output)
#     batch_size = output.get_shape().as_list()[0]
    reshape = tf.reshape(output, [-1, forward_units + backward_units])
    print(reshape)
    
with tf.variable_scope("output"):
    softmax_w = tf.get_variable("softmax_w",
                                shape=[forward_units + backward_units, n_classes],
                                initializer=tf.truncated_normal_initializer(stddev=0.05),
                                dtype=tf.float32)
    softmax_b = tf.get_variable("softmax_b",
                                shape=[n_classes],
                                initializer=tf.constant_initializer(value=0.),
                                dtype=tf.float32)
    xw_plus_b = tf.nn.xw_plus_b(reshape, softmax_w, softmax_b)
    print(xw_plus_b)
    logits = tf.reshape(xw_plus_b, [-1, seq_max_len, n_classes])
    print(logits)

with tf.name_scope("loss"):
    y_arg = tf.argmax(y, axis=2)
    print(y_arg)
    fake_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_arg)
    print(fake_loss)
    y_sum = tf.reduce_sum(y, axis=2)
    print(y_sum)
    mask = tf.cast(tf.sign(y_sum), dtype=tf.float32)
    print(mask)
    loss_per_example_per_step = tf.multiply(fake_loss, mask)
    print(loss_per_example_per_step)
    loss_per_example_sum = tf.reduce_sum(loss_per_example_per_step, reduction_indices=[1])
    print(loss_per_example_sum)
    loss_per_example_average = tf.div(x=loss_per_example_sum,
                                      y=tf.cast(seqlen, tf.float32))
    print(loss_per_example_average)
    loss = tf.reduce_mean(loss_per_example_average, name="loss")
    print(loss)

with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss, name="train_op")
    
with tf.name_scope("valid"):
    predict = tf.argmax(logits, axis=2)
    print(predict)
    fake_accuracy = tf.cast(tf.equal(predict, y_arg), dtype=tf.float32)
    accuracy_matrix = tf.multiply(fake_accuracy, mask)
    accuracy_per_example = tf.div(x=tf.reduce_sum(accuracy_matrix, 1),
                                  y=tf.cast(seqlen, tf.float32))
    accuracy = tf.reduce_mean(accuracy_per_example, name="valid_accuracy")

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

(<tf.Tensor 'bi-GRU/bidirectional_rnn/fw/fw/transpose:0' shape=(?, 114, 128) dtype=float32>, <tf.Tensor 'bi-GRU/ReverseSequence:0' shape=(?, 114, 128) dtype=float32>)
Tensor("bi-GRU/concat:0", shape=(?, 114, 256), dtype=float32)
Tensor("bi-GRU/Reshape:0", shape=(?, 256), dtype=float32)
Tensor("output/xw_plus_b:0", shape=(?, 3), dtype=float32)
Tensor("output/Reshape:0", shape=(?, 114, 3), dtype=float32)
Tensor("loss/ArgMax:0", shape=(?, 114), dtype=int64)
Tensor("loss/SparseSoftmaxCrossEntropyWithLogits/Reshape_2:0", shape=(?, 114), dtype=float32)
Tensor("loss/Sum:0", shape=(?, 114), dtype=float32)
Tensor("loss/Sign:0", shape=(?, 114), dtype=float32)
Tensor("loss/Mul:0", shape=(?, 114), dtype=float32)
Tensor("loss/Sum_1:0", shape=(?,), dtype=float32)
Tensor("loss/div:0", shape=(?,), dtype=float32)
Tensor("loss/loss:0", shape=(), dtype=float32)
Tensor("valid/ArgMax:0", shape=(?, 114), dtype=int64)


In [27]:
LOG_DIR = "./biGRU/"
saver = tf.train.Saver(tf.global_variables(), max_to_keep=15)
module_file = tf.train.latest_checkpoint(LOG_DIR)

tf.summary.scalar("loss", loss)
tf.summary.scalar("acc", accuracy)
merged = tf.summary.merge_all()

# Start training
with tf.Session() as sess:
    train_writer = tf.summary.FileWriter(LOG_DIR + "train/", sess.graph)

    # Run the initializer
    sess.run(init)
#     saver.restore(sess, module_file)

    for step in range(1, training_steps + 1):
        # Get batch data
        batch_x, batch_y, batch_seqlen = zeroday_batch.next_batch(batch_size)
        # Run optimization op (backprop)
        _, summary_ = sess.run([train_op, merged], feed_dict={x: batch_x, y: batch_y, seqlen: batch_seqlen})
        train_writer.add_summary(summary_, step)
        
        if step % display_step == 0 or step == 1:
            # Calculate batch accuracy & loss
            cost, acc = sess.run([loss, accuracy], feed_dict={x: batch_x, y: batch_y, seqlen: batch_seqlen})
            print("Step " + str(step*batch_size) + \
                  ", Minibatch Loss= " + "{:.6f}".format(cost) + \
                  ", Training Accuracy= " + "{:.5f}".format(acc))
            
            # Calculate accuracy
            test_data = zeroday_batch.test_x
            test_label = zeroday_batch.test_y
            test_seqlen = zeroday_batch.test_size
            test_acc = sess.run(accuracy, feed_dict={x: test_data, y: test_label, seqlen: test_seqlen})
            print("Testing Accuracy:", test_acc)
        if step % 5 == 0:
            saver.save(sess, LOG_DIR+"zeroday.ckpt", global_step=step)

    print("Optimization Finished!")

    # Calculate accuracy
    test_data = zeroday_batch.test_x
    test_label = zeroday_batch.test_y
    test_seqlen = zeroday_batch.test_size
    test_acc = sess.run(accuracy, feed_dict={x: test_data, y: test_label, seqlen: test_seqlen})
    print("Testing Accuracy:", test_acc)

Step 100, Minibatch Loss= 0.910922, Training Accuracy= 0.96366
Testing Accuracy: 0.95113
Step 5000, Minibatch Loss= 0.018893, Training Accuracy= 0.99567
Testing Accuracy: 0.99287
Step 10000, Minibatch Loss= 0.004648, Training Accuracy= 0.99879
Testing Accuracy: 0.995548
Step 15000, Minibatch Loss= 0.000220, Training Accuracy= 1.00000
Testing Accuracy: 0.997138
Step 20000, Minibatch Loss= 0.000085, Training Accuracy= 1.00000
Testing Accuracy: 0.998171
Step 25000, Minibatch Loss= 0.000049, Training Accuracy= 1.00000
Testing Accuracy: 0.998658
Step 30000, Minibatch Loss= 0.000017, Training Accuracy= 1.00000
Testing Accuracy: 0.99862
Step 35000, Minibatch Loss= 0.000082, Training Accuracy= 1.00000
Testing Accuracy: 0.99862
Step 40000, Minibatch Loss= 0.000062, Training Accuracy= 1.00000
Testing Accuracy: 0.998584
Step 45000, Minibatch Loss= 0.000015, Training Accuracy= 1.00000
Testing Accuracy: 0.998584
Step 50000, Minibatch Loss= 0.000009, Training Accuracy= 1.00000
Testing Accuracy: 0.99

In [None]:
"" 