In [1]:
import pos
import numpy as np
import tensorflow as tf
from cnn_seq_label import Tagger
from sklearn.metrics import classification_report


SEQ_LEN = 20
BATCH_SIZE = 128
NUM_EPOCH = 1
sample = ['I', 'love', 'you']


def to_train_seq(*args):
    data = []
    for x in args:
        data.append(iter_seq(x))
    return data


def to_test_seq(*args):
    data = []
    for x in args:
        x = x[: (len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


def iter_seq(x, text_iter_step=1):
    return np.array([x[i : i+SEQ_LEN] for i in range(0, len(x)-SEQ_LEN, text_iter_step)])


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data()
    X_train, Y_train = to_train_seq(x_train, y_train)
    X_test, Y_test = to_test_seq(x_test, y_test)
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    clf = Tagger(vocab_size, n_class, SEQ_LEN)
    clf.fit(X_train, Y_train, n_epoch=NUM_EPOCH, batch_size=BATCH_SIZE)
    
    y_pred = clf.predict(X_test, batch_size=BATCH_SIZE)
    print(classification_report(Y_test.ravel(), y_pred.ravel(), target_names=tag2idx.keys()))

    idx2tag = {idx : tag for tag, idx in tag2idx.items()}
    _test = [word2idx[w] for w in sample] + [0] * (SEQ_LEN-len(sample))
    labels = clf.infer(_test, len(sample))
    print(' '.join(sample))
    print(' '.join([idx2tag[idx] for idx in labels if idx != 0]))


Vocab Size: 19124 | x_train: 211727 | x_test: 47377
(211707, 20) (211707, 20) (2368, 20) (2368, 20)
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Data Shuffled
Epoch 1/1 | Step 0/1653 | train_loss: 75.9255 | train_acc: 0.0348 | lr: 0.0050
Epoch 1/1 | Step 50/1653 | train_loss: 3.5961 | train_acc: 0.9508 | lr: 0.0047
Epoch 1/1 | Step 100/1653 | train_loss: 1.2834 | train_acc: 0.9789 | lr: 0.0044
Epoch 1/1 | Step 150/1653 | train_loss: 0.7434 | train_acc: 0.9898 | lr: 0.0041
Epoch 1/1 | Step 200/1653 | train_loss: 0.6255 | train_acc: 0.9918 | lr: 0.0038
Epoch 1/1 | Step 250/1653 | train_loss: 0.2492 | train_acc: 0.9957 | lr: 0.0035
Epoch 1/1 | Step 300/1653 | train_loss: 0.4201 | train_acc: 0.9926 | lr: 0.0033
Epoch 1/1 | Step 350/1653 | train_loss: 0.1966 | train_acc: 0.9965 | lr: 0.0031
Epoch 1/1 | Step 400/1653 | train_loss: 0.1028 | train_acc: 0.9988 | lr: 0.0029
Epoch 1/1 | Step 450/1653 | train_loss: 0.2348 | train_acc: 0.9961 | lr: 0.0027
Epoch 1/1 | Step 500/1653 | train_loss: 0.1731 | train_acc: 0.9969 | lr: 0.0025
Epoch 1/1 | Step 550/1653 | train_loss: 0.1469 | train_acc: 0.9969 | lr: 0.0023
Epoch 1/1 | Step 600/1653 | 

  .format(len(labels), len(target_names))
