In [1]:
from model_v2 import build_lm_classifier_inference, LSTM_SAVED_STATE
from utils import get_batch_classifier_inference, clean_text
import json
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [2]:
with open('102_sa/checkpoints/model_configs.json', 'r') as inp:
    lm_params = json.load(inp)
with open('102_sa/checkpoints/classifier_configs.json', 'r') as inp:
    cls_params = json.load(inp)
# Load in CPU
language_model, classifier = build_lm_classifier_inference(lm_params, cls_params, is_cpu=True)

In [3]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
saver = tf.train.Saver([x for x in tf.global_variables() if x not in tf.get_collection(LSTM_SAVED_STATE)])
saver.restore(session, '102_sa/checkpoints/final_classifier_cpu/model.cpkt-4500')

INFO:tensorflow:Restoring parameters from 102_sa/checkpoints/final_classifier_cpu/model.cpkt-4500


In [4]:
with open('SentimentAnalysis/word2idx.json', 'r') as inp:
    word2idx = json.load(inp)
with open('SentimentAnalysis/char2idx.json', 'r') as inp:
    char2idx = json.load(inp)
word2char = {w: [char2idx[c] for c in w] for w in word2idx}

In [5]:
def predict(texts, bsz=32):
    texts = [clean_text(x.strip()) for x in texts]
    texts = np.array([[word2char.get(w, word2char['<UNK>']) for w in sent.split()] for sent in texts])
    results = []
    for chars, lens, char_lens in get_batch_classifier_inference(texts, bsz):
        probs = session.run(classifier.probs, feed_dict={
            language_model.inputs: chars, language_model.seq_lens: lens,
            language_model.char_lens: char_lens, language_model.bptt: 20
        })
        results.append(probs)
    return np.concatenate(results, axis=0)

In [6]:
label2idx = {'POS': 2, 'NEU': 1, 'NEG': 0}
with open('SentimentAnalysis/test_raw_ANS.txt', 'r') as inp:
    lines = inp.readlines()
    test_data = lines[::2]
    test_label = [label2idx[x.strip()] for x in lines[1::2]]

In [7]:
preds = np.argmax(predict(test_data), axis=1)

In [8]:
from sklearn.metrics import classification_report

In [9]:
print(classification_report(y_true=test_label, y_pred=preds))

              precision    recall  f1-score   support

           0       0.70      0.70      0.70       350
           1       0.65      0.65      0.65       350
           2       0.76      0.75      0.75       350

   micro avg       0.70      0.70      0.70      1050
   macro avg       0.70      0.70      0.70      1050
weighted avg       0.70      0.70      0.70      1050

