# Model Testing
> Final testing results for best models

In [1]:
%load_ext autoreload
%autoreload 2
from importlib import reload

import numpy as np
#import time # !
import json
#from matplotlib import pyplot as plt

from loadutils import retrieve_decoder_model, retrieve_model
import evaluation_helper as eh
import tensorflow as tf
import pandas as pd
from keras.utils import to_categorical

import glove_helper
from loadutils import conll2003Data, saveProcessedData, retrieve_model
from common import vocabulary, utils


Using TensorFlow backend.


In [2]:
# TRAIN_FILE = "../data/CoNLL-2003_NeuroNER/en/train.txt"
# DEV_FILE = "../data/CoNLL-2003_NeuroNER/en/valid.txt"
# TEST_FILE = "../data/CoNLL-2003_NeuroNER/en/test.txt"

TRAIN_FILE = "../data/pos_tagging/es/train.txt"
DEV_FILE = "../data/pos_tagging/es/train.txt"
TEST_FILE = "../data/pos_tagging/es/train.txt"

# TRAIN_FILE = "../data/pos_tagging/es/train_combined.txt"
# DEV_FILE = "../data/pos_tagging/es/dev_combined.txt"
# TEST_FILE = "../data/pos_tagging/es/test_combined.txt"

## Local helper utils

In [3]:
def construct_embedding_matrix(embed_dim, vocab_size):
    """
    construct embedding matrix from GloVe 6Bn word data
    
    reuse glove_helper code from w266 
    
    Returns: an embedding matrix directly plugged into keras.layers.Embedding(weights=[embedding_matrix])
    """
    reload(glove_helper)
    hands = glove_helper.Hands(ndim=embed_dim)
    embedding_matrix = np.zeros((vocab_size, embed_dim))
    
    for i in range(vocabData.vocab.size):
        word = vocabData.vocab.ids_to_words([i])[0]
        try:
            embedding_vector = hands.get_vector(word)
        except:
            embedding_vector = hands.get_vector("<unk>")
        embedding_matrix[i] = embedding_vector

    return embedding_matrix

## Load the Data

In [4]:
# UPDATES!
global_max_features = 20000
windowLength = 11
#testNumSents = 20000

# Use training set to build vocab here
vocabData = conll2003Data(TRAIN_FILE)
vocabData.buildVocab( vocabSize=global_max_features)

# Format training data
trainX, trainX_pos, trainX_capitals, trainY  = vocabData.formatWindowedData( 
                                                  vocabData.train_sentences, 
                                                  windowLength=windowLength,
                                                  verbose=False)

# read in dev data
devSents = vocabData.readFile( DEV_FILE)
devX, devX_pos, devX_capitals, devY = vocabData.formatWindowedData( 
                                              devSents, 
                                              windowLength=windowLength,
                                              verbose=False)

# read in the test data
testSents = vocabData.readFile( TEST_FILE)
testX, testX_pos, testX_capitals, testY = vocabData.formatWindowedData( 
                                                testSents, 
                                                windowLength=windowLength,
                                                verbose=False)

----------------------------------------------------
reading file from path ../data/pos_tagging/es/train.txt
'readFile'  1920.69 ms
----------------------------------------------------
building vocabulary from TRAINING data...
'buildVocab'  1844.74 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  3862.02 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/train.txt
'readFile'  1791.36 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  3007.44 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/train.txt
'readFile'  1551.35 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  3333.53 ms


In [5]:
# Load GloVe embedding matrix

# global_embed_dim = 50
global_embed_dim = 300

embedding_matrix = construct_embedding_matrix( global_embed_dim, 
                                               global_max_features)

Loading vectors from data/es/wiki.es.zip
Parsing file: data/es/wiki.es.zip:wiki.es.vec
Found 985,668 words.
Parsing vectors... Done! (W.shape = (985671, 300))


In [6]:
# Get Y

# cat train/dev/test to make sure we have all labels in test set

# encoding 1-hot for ner targets
trainY_cat = to_categorical(trainY.astype('float32'))
devY_cat = to_categorical(devY.astype('float32'), num_classes=trainY_cat.shape[1])
testY_cat = to_categorical(testY.astype('float32'), num_classes=trainY_cat.shape[1])

trainY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), trainY_cat)), dtype=np.float)
devY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), devY_cat)), dtype=np.float)
testY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), testY_cat)), dtype=np.float)

In [7]:
# Get decoder Y -- 50 dim embedding of center word
train_decoderY = embedding_matrix[trainX[:,4]]
dev_decoderY = embedding_matrix[devX[:,4]]
test_decoderY = embedding_matrix[testX[:,4]]

In [9]:
# Get X pos tags

#encoding 1-hot for pos tags
trainX_pos_cat = to_categorical(trainX_pos.astype('float32'))
devX_pos_cat = to_categorical(devX_pos.astype('float32'), num_classes=trainX_pos_cat.shape[2]) 
testX_pos_cat = to_categorical(testX_pos.astype('float32'), num_classes=trainX_pos_cat.shape[2])

In [None]:
## AZ: this causes the kernel to die sometimes...

trainX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_pos_cat)), dtype=np.float)
devX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_pos_cat)), dtype=np.float)
testX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_pos_cat)), dtype=np.float)

In [None]:
# Get X capitlization 

# encoding 1-hot for capitalization info  ("allCaps", "upperInitial", "lowercase", "mixedCaps", "noinfo")
trainX_capitals_cat = to_categorical(trainX_capitals.astype('float32'))
devX_capitals_cat = to_categorical(devX_capitals.astype('float32')), num_classes=trainX_capitals_cat.shape[2]) 
testX_capitals_cat = to_categorical(testX_capitals.astype('float32')), num_classes=trainX_capitals_cat.shape[2])

trainX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_capitals_cat)), dtype=np.float)
devX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_capitals_cat)), dtype=np.float)
testX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_capitals_cat)), dtype=np.float)

## Final Models

In [None]:
## AZ: if we want to use features
devX_dict = {"x":devX, "x_pos":devX_pos_cat, "x_capital":devX_capitals_cat}
testX_dict = {"x":testX, "x_pos":testX_pos_cat, "x_capital":testX_capitals_cat}

## AZ: if we don't want to load the features ("x_pos" here)
# devX_dict = {"x":devX, "x_capital":devX_capitals_cat}
# testX_dict = {"x":testX, "x_capital":testX_capitals_cat}

modelDir = { "save_dir" : "result"}

## Capsnet model

### no features

In [None]:
# model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName = "glove_nolearn_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

### with features

In [None]:
# model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName = "glove_nolearn_features"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

### Model B (Model A + Decoder)

In [None]:
# # model B - Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps
# # modelName = "Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps"
# modelName = "glove_nolearn_base"
# myModel = retrieve_decoder_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred, raw_eval_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# #cm = report_obj4.get_confusion_matrix( testY, y_pred)
# #print()
# #cm

## CNN

### no features

In [None]:
# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "glove_nolearn_cnn_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

### with features

In [None]:
# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "glove_nolearn_cnn_features"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm