In [1]:
from model_v2 import build_lm_classifier_inference, LSTM_SAVED_STATE
from utils import get_batch_classifier_inference, clean_text_v3 as clean_text
import json
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [2]:
def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the 
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we import the graph_def into a new Graph and returns it 
    with tf.Graph().as_default() as graph:
        # The name var will prefix every op/nodes in your graph
        # Since we load everything in a new graph, this is not needed
        tf.import_graph_def(graph_def, name="prefix")
    return graph

In [3]:
graph = load_graph("106_sa/checkpoints/class_cpu/frozen.pb-4500")

In [4]:
inputs = graph.get_tensor_by_name('prefix/LanguageModel/fw_inputs:0')
seq_lens = graph.get_tensor_by_name('prefix/LanguageModel/seq_lens:0')
char_lens = graph.get_tensor_by_name('prefix/LanguageModel/fw_char_lens:0')
bptt = graph.get_tensor_by_name('prefix/LanguageModel/bptt:0')
predict_prob = graph.get_tensor_by_name('prefix/Classifier/Softmax:0')

In [5]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config, graph=graph)

In [6]:
with open('106_sa/word2idx.json', 'r') as inp:
    word2idx = json.load(inp)
with open('106_sa/char2idx.json', 'r') as inp:
    char2idx = json.load(inp)
word2char = {w: [char2idx[c] for c in w] for w in word2idx}

In [7]:
def predict(texts, bsz=32):
    texts = [clean_text(x.strip()) for x in texts]
    texts = np.array([[word2char.get(w, word2char['<UNK>']) for w in sent] for sent in texts])
    results = []
    for chars, lens, cl in get_batch_classifier_inference(texts, bsz):
        probs = session.run(predict_prob, feed_dict={
            inputs: chars, seq_lens: lens,
            char_lens: cl, bptt: 20
        })
        results.append(probs)
    return np.concatenate(results, axis=0)

In [8]:
label2idx = {'POS': 2, 'NEU': 1, 'NEG': 0}
with open('SentimentAnalysis/test_raw_ANS.txt', 'r') as inp:
    lines = inp.readlines()
    test_data = lines[::2]
    test_label = [label2idx[x.strip()] for x in lines[1::2]]

In [9]:
preds = predict(test_data)

In [10]:
from sklearn.metrics import classification_report

In [11]:
print(classification_report(y_true=test_label, y_pred=np.argmax(preds, axis=1), digits=4))

              precision    recall  f1-score   support

           0     0.6834    0.7400    0.7106       350
           1     0.6725    0.6571    0.6647       350
           2     0.7872    0.7400    0.7629       350

   micro avg     0.7124    0.7124    0.7124      1050
   macro avg     0.7144    0.7124    0.7127      1050
weighted avg     0.7144    0.7124    0.7127      1050

