In [1]:
# Imports

In [2]:
import pandas as pd
import os
import numpy as np
import math
import time
from sklearn import preprocessing

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model

import string
import re
import argparse

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
# Load Label Encodings and Model

In [4]:
le_pos = preprocessing.LabelEncoder()
le_pos.classes_ = np.load('./data/le_pos_classes.npy')

le_ner = preprocessing.LabelEncoder()
le_ner.classes_ = np.load('./data/le_ner_classes.npy')

le_chu = preprocessing.LabelEncoder()
le_chu.classes_ = np.load('./data/le_chu_classes.npy')

In [4]:
model = load_model("./data/fitted_model/", compile = False)

In [5]:
# Make Predictions

In [17]:
input_sentence = "Thousands of demonstrators have marched to Narnia for a protest. They are mad at the 4 policies of Sauron that they find objectionable."



In [18]:
sentence_array = input_sentence.split(" ")
for i in string.punctuation:
    while(i in sentence_array): 
        sentence_array.remove(i)
sentence_length = len(sentence_array)

In [71]:
prob_pos, prob_ner, prob_chu = model.predict(
    [[input_sentence]]
)

In [72]:
def logits_to_tokens(sequences, index):
    token_sequences = []
    for categorical_sequence in sequences:
        token_sequence = []
        for categorical in categorical_sequence:
            token_sequence.append(index[np.argmax(categorical)])
 
        token_sequences.append(token_sequence)
    return token_sequences

In [73]:
pos_list = logits_to_tokens(prob_pos, le_pos.classes_)
ner_list = logits_to_tokens(prob_ner, le_ner.classes_)
chu_list = logits_to_tokens(prob_chu, le_chu.classes_)

pos_list = pos_list[0][0:sentence_length]
ner_list = ner_list[0][0:sentence_length]
chu_list = chu_list[0][0:sentence_length]

In [74]:
pos_dict = {"CC": "Coordinating Conjunction",
            "CD": "Cardinal Number",
            "DT": "Determiner",
            "EX": "Existencial There",
            "FW": "Foreign Word",
            "IN": "Preposition or Subordinating Conjunction",
            "JJ": "Adjective",
            "JJR": "Adjective, Comparative",
            "JJS": "Adjective, Superlative",
            "LS": "List Item Marker",
            "MD": "Modal",
            "NN": "Noun, Singular or Mass",
            "NNS": "Noun, Plural",
            "NNP": "Proper Noun, Singular",
            "NNPS": "Proper Noun, Plural",
            "PDT": "Predeterminer",
            "POS": "Possessive Ending",
            "PRP": "Personal Pronoun",
            "PRP$": "Possessive Pronoun",
            "RB": "Adverb",
            "RBR": "Adverb, Comparative",
            "RBS": "Adverb, Superlative",
            "RP": "Particle",
            "SYM": "Symbol",
            "TO": "To",
            "UH": "Interjection",
            "VB": "Verb, Base Form",
            "VBD": "Verb, Past Tense",
            "VBG": "Verb, Gerund or Present Pariciple",
            "VBN": "Verb, Past Participle",
            "VBP": "Verb, Non-3rd Person Singular Present",
            "VBZ": "Verb, 3rd Person Singular Present",
            "WDT": "Whdeterminer",
            "WP": "Whpronoun",
            "WP$": "Possessive Whpronoun",
            "WRB": "Whadverb",
            "$": "$",
            "*": "*",
            ',': ',', 
            '.': '.', 
            ':': ':', 
            ';': ';',
            '``': '``',
            'LRB': "Left Parentheses",
            'RRB': "Right Parentheses"}

ner_dict = {"geo": "Geographical Entity",
            "org": "Organization",
            "per": "Person",
            "gpe": "Geopolitical Entity",
            "tim": "Time Indicator",
            "art": "Artifact",
            "eve": "Event",
            "nat": "Natural Phenomenon",
            "O": "Not a Named Entity",
            "*": "*"}

chu_dict = {"B": "Begin Chunk",
            "I": "Inside Chunk",
            "O": "Not a Named Entity",
            "*": "*"}


In [75]:
indices = np.arange(0, len(sentence_array))
results = pd.DataFrame(columns = ["-Word-", "-Part-of-Speech-", "-Named Entity-", "-Beginning or Inside Chunk-"],
                       index = indices)
for i in results.index:
    results.loc[i, "-Word-"] = sentence_array[i]
    results.loc[i, "-Part-of-Speech-"] = pos_dict[pos_list[i]]
    results.loc[i, "-Named Entity-"] = ner_dict[ner_list[i]]
    results.loc[i, "-Beginning or Inside Chunk-"] = chu_dict[chu_list[i]]
print(results)

              Word                           Part-of-Speech  \
0        Thousands                             Noun, Plural   
1               of  Prepositon or Subordinating Conjunction   
2    demonstrators                             Noun, Plural   
3             have    Verb, Non-3rd Person Singular Present   
4          marched                    Verb, Past Participle   
5               to                                       To   
6           Narnia                          Verb, Base Form   
7              for  Prepositon or Subordinating Conjunction   
8                a                               Determiner   
9         protest.                   Noun, Singular or Mass   
10            They                         Personal Pronoun   
11             are    Verb, Non-3rd Person Singular Present   
12             mad                    Verb, Past Participle   
13              at  Prepositon or Subordinating Conjunction   
14             the                               Determ