# Model Testing
> Final testing results for best models

In [1]:
%load_ext autoreload
%autoreload 2
from importlib import reload

import numpy as np
#import time # !
import json
#from matplotlib import pyplot as plt

from loadutils import retrieve_decoder_model, retrieve_model
import evaluation_helper as eh
import tensorflow as tf
import pandas as pd
from keras.utils import to_categorical

import glove_helper
from loadutils import conll2003Data, saveProcessedData, retrieve_model
from common import vocabulary, utils


Using TensorFlow backend.


In [2]:
# TRAIN_FILE = "../data/CoNLL-2003_NeuroNER/en/train.txt"
# DEV_FILE = "../data/CoNLL-2003_NeuroNER/en/valid.txt"
# TEST_FILE = "../data/CoNLL-2003_NeuroNER/en/test.txt"

TRAIN_FILE = "../data/pos_tagging/es/train.txt"
DEV_FILE = "../data/pos_tagging/es/dev.txt"
TEST_FILE = "../data/pos_tagging/es/test.txt"

# TRAIN_FILE = "../data/pos_tagging/es/train.txt"
# DEV_FILE = "../data/pos_tagging/es/train.txt"
# TEST_FILE = "../data/pos_tagging/es/train.txt"

## Local helper utils

In [3]:
def construct_embedding_matrix(embed_dim, vocab_size):
    """
    construct embedding matrix from GloVe 6Bn word data
    
    reuse glove_helper code from w266 
    
    Returns: an embedding matrix directly plugged into keras.layers.Embedding(weights=[embedding_matrix])
    """
    reload(glove_helper)
    hands = glove_helper.Hands(ndim=embed_dim)
    embedding_matrix = np.zeros((vocab_size, embed_dim))
    
    for i in range(vocabData.vocab.size):
        word = vocabData.vocab.ids_to_words([i])[0]
        try:
            embedding_vector = hands.get_vector(word)
        except:
            embedding_vector = hands.get_vector("<unk>")
        embedding_matrix[i] = embedding_vector

    return embedding_matrix

## Load the Data

In [4]:
# UPDATES!
global_max_features = 20000
windowLength = 11
#testNumSents = 20000

# Use training set to build vocab here
vocabData = conll2003Data(TRAIN_FILE)
vocabData.buildVocab( vocabSize=global_max_features)

# Format training data
trainX, trainX_pos, trainX_capitals, trainY  = vocabData.formatWindowedData( 
                                                  vocabData.train_sentences, 
                                                  windowLength=windowLength,
                                                  verbose=False)

# read in dev data
devSents = vocabData.readFile( DEV_FILE)
devX, devX_pos, devX_capitals, devY = vocabData.formatWindowedData( 
                                              devSents, 
                                              windowLength=windowLength,
                                              verbose=False)

# read in the test data
testSents = vocabData.readFile( TEST_FILE)
testX, testX_pos, testX_capitals, testY = vocabData.formatWindowedData( 
                                                testSents, 
                                                windowLength=windowLength,
                                                verbose=False)

----------------------------------------------------
reading file from path ../data/pos_tagging/es/train.txt
'readFile'  1827.95 ms
----------------------------------------------------
building vocabulary from TRAINING data...
'buildVocab'  1433.36 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  2989.05 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/dev.txt
'readFile'  187.33 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  425.49 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/test.txt
'readFile'  174.80 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  469.07 ms


In [5]:
# Load GloVe embedding matrix

# global_embed_dim = 50
global_embed_dim = 300

embedding_matrix = construct_embedding_matrix( global_embed_dim, 
                                               global_max_features)

Loading vectors from data/es/wiki.es.zip
Parsing file: data/es/wiki.es.zip:wiki.es.vec
Found 985,668 words.
Parsing vectors... Done! (W.shape = (985671, 300))


In [7]:
# AZ:  testY and devY have different shapes here (due to different number of labels in data sets?)

to_categorical(testY.astype('float32')).shape

(356738, 219)

In [8]:
# Get Y

# cat train/dev/test to make sure we have all labels in test set

# encoding 1-hot for ner targets
trainY_cat = to_categorical(trainY.astype('float32'))
devY_cat = to_categorical(devY.astype('float32'), num_classes=trainY_cat.shape[1])
testY_cat = to_categorical(testY.astype('float32'), num_classes=trainY_cat.shape[1])

trainY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), trainY_cat)), dtype=np.float)
devY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), devY_cat)), dtype=np.float)
testY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), testY_cat)), dtype=np.float)

In [9]:
# AZ:  testY and devY now the same shape as trainY, causes problems for constructing confusion matrix

testY_cat.shape

(44563, 208)

In [10]:
# Get decoder Y -- 50 dim embedding of center word
train_decoderY = embedding_matrix[trainX[:,4]]
dev_decoderY = embedding_matrix[devX[:,4]]
test_decoderY = embedding_matrix[testX[:,4]]

In [11]:
# Get X pos tags

# encoding 1-hot for pos tags
trainX_pos_cat = to_categorical(trainX_pos.astype('float32'))
devX_pos_cat = to_categorical(devX_pos.astype('float32'))#, num_classes=trainX_pos_cat.shape[2]) 
testX_pos_cat = to_categorical(testX_pos.astype('float32'))#, num_classes=trainX_pos_cat.shape[2])

trainX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_pos_cat)), dtype=np.float)
devX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_pos_cat)), dtype=np.float)
testX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_pos_cat)), dtype=np.float)

In [12]:
# Get X capitlization 

# encoding 1-hot for capitalization info  ("allCaps", "upperInitial", "lowercase", "mixedCaps", "noinfo")
trainX_capitals_cat = to_categorical(trainX_capitals.astype('float32'))
devX_capitals_cat = to_categorical(devX_capitals.astype('float32'))#, num_classes=trainX_capitals_cat.shape[2]) 
testX_capitals_cat = to_categorical(testX_capitals.astype('float32'))#, num_classes=trainX_capitals_cat.shape[2])

trainX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_capitals_cat)), dtype=np.float)
devX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_capitals_cat)), dtype=np.float)
testX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_capitals_cat)), dtype=np.float)

## Final Models

In [13]:
devX_dict = {"x":devX, "x_pos":devX_pos_cat, "x_capital":devX_capitals_cat}
testX_dict = {"x":testX, "x_pos":testX_pos_cat, "x_capital":testX_capitals_cat}
modelDir = { "save_dir" : "result"}

### Model A

In [15]:
# model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
# modelName = "Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps"
modelName = "glove_nolearn_base"
# modelName = "glove_learn_dropout_pos_caps"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
print("type(testY):\t", type(testY))
print("testY.shape:\t", testY.shape)
print("type(y_pred):\t", type(y_pred))
print("y_pred.shape:\t",y_pred.shape)
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

W0326 18:17:08.661491 140235734583104 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0326 18:17:08.665400 140235734583104 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0326 18:17:08.779669 140235734583104 deprecation.py:506] From /home/andrew/Documents/CapsuleNetworks/CapsNet_for_NER/code/capsulelayers.py:136: calling softmax (from tensorflow.python.ops.nn_ops) with dim is deprecated and will be removed in a future version.
Instructions for updating:
dim is deprecated, use axis instead


Retrieving model: glove_nolearn_base


W0326 18:17:08.902313 140235734583104 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0326 18:17:08.902991 140235734583104 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W0326 18:17:08.903574 140235734583104 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:186: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

W0326 18:17:08.932119 140235734583104 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

W0326 18:17:08.933122 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           6000000   
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)       (None, 216, 16)           6193152   
__________

  gold_idx_dict[gold] = np.hstack(self.gold_pred_idx_dict[gold].values())
W0326 18:21:09.344893 140235734583104 module_wrapper.py:139] From /home/andrew/Documents/CapsuleNetworks/CapsNet_for_NER/code/evaluation_helper.py:308: The name tf.confusion_matrix is deprecated. Please use tf.math.confusion_matrix instead.



Precision 0.9334898487314249
Recall	 0.9321221162555278
F1	 0.9328054811319497
type(testY):	 <class 'numpy.ndarray'>
testY.shape:	 (44843,)
type(y_pred):	 <class 'numpy.ndarray'>
y_pred.shape:	 (44843,)


ValueError: Shape of passed values is (196, 196), indices imply (216, 216)

### Model B (Model A + Decoder)

In [None]:
# model B - Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps
# modelName = "Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps"
modelName = "glove_nolearn_base"
myModel = retrieve_decoder_model( modelName, modelDir)
myModel.summary()

raw_y_pred, raw_eval_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

### CNN

In [None]:
# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
# modelName = "Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps"
modelName = "glove_nolearn_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm