# Model Testing
> Final testing results for best models

In [1]:
%load_ext autoreload
%autoreload 2
from importlib import reload

import numpy as np
#import time # !
import json
#from matplotlib import pyplot as plt
import time

from loadutils import retrieve_decoder_model, retrieve_model
import evaluation_helper as eh
import tensorflow as tf
import pandas as pd
from keras.utils import to_categorical

import glove_helper
from loadutils import conll2003Data, saveProcessedData, retrieve_model
from common import vocabulary, utils


Using TensorFlow backend.


In [2]:
## AZ UPDATE 4/21/2020:  I switched the columns back in load_utils.py
# I also re-generated the data files, so results may differ from before

start_time = time.time()

"""
Pick which language to test on
"""
LANGUAGE = "es"
# LANGUAGE = "ca"
# LANGUAGE = "gd"

"""
Pick how much of the training data to use
"""
# TRAIN_AMOUNT = "100"
# TRAIN_AMOUNT = "50"
TRAIN_AMOUNT = "10"

DIRECTORY = "../data/pos_tagging/"+LANGUAGE+"/"

#training file depends on low-resource or not; 100%, 50%, or 10% of training data used
TRAIN_FILE = DIRECTORY+"train_"+TRAIN_AMOUNT+".txt"

# dev, test, and vectors stay the same
DEV_FILE = DIRECTORY+"dev.txt"
TEST_FILE = DIRECTORY+"test.txt"
# VECTORS = "data/"+LANGUAGE+"/wiki."+LANGUAGE+".zip"
VECTORS = "data/"+LANGUAGE+"/cc."+LANGUAGE+".300.zip"

# # out files for IPC
# HYPER_PARAM_FILE = "hyper_params.json"

# VOCAB_SIZE = 20000

## PRINT OUT HYPERPARAMETERS FOR REFERENCE
print("LANGUAGE:\t", LANGUAGE)
print("TRAIN_AMOUNT:\t", TRAIN_AMOUNT)

LANGUAGE:	 es
TRAIN_AMOUNT:	 100


## Local helper utils

In [3]:
def construct_embedding_matrix(embed_dim, vocab_size):
    """
    construct embedding matrix from GloVe 6Bn word data
    
    reuse glove_helper code from w266 
    
    Returns: an embedding matrix directly plugged into keras.layers.Embedding(weights=[embedding_matrix])
    """
    reload(glove_helper)
    hands = glove_helper.Hands(vector_zip=VECTORS, ndim=embed_dim)
    embedding_matrix = np.zeros((vocab_size, embed_dim))
    
    for i in range(vocabData.vocab.size):
        word = vocabData.vocab.ids_to_words([i])[0]
        try:
            embedding_vector = hands.get_vector(word)
        except:
            embedding_vector = hands.get_vector("<unk>")
        embedding_matrix[i] = embedding_vector

    return embedding_matrix

## Load the Data

In [4]:
# UPDATES!
global_max_features = 20000
windowLength = 11
#testNumSents = 20000

# Use training set to build vocab here
vocabData = conll2003Data(TRAIN_FILE)
vocabData.buildVocab( vocabSize=global_max_features, verbose=False)

# Format training data
trainX, trainX_pos, trainX_capitals, trainY  = vocabData.formatWindowedData( 
                                                  vocabData.train_sentences, 
                                                  windowLength=windowLength,
                                                  verbose=False)

# read in dev data
devSents = vocabData.readFile( DEV_FILE)
devX, devX_pos, devX_capitals, devY = vocabData.formatWindowedData( 
                                              devSents, 
                                              windowLength=windowLength,
                                              verbose=False)

# read in the test data
testSents = vocabData.readFile( TEST_FILE)
testX, testX_pos, testX_capitals, testY = vocabData.formatWindowedData( 
                                                testSents, 
                                                windowLength=windowLength,
                                                verbose=False)

----------------------------------------------------
reading file from path ../data/pos_tagging/es/train_100.txt
'readFile'  2208.14 ms
----------------------------------------------------
building vocabulary from TRAINING data...
'buildVocab'  2001.08 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  5040.96 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/dev.txt
'readFile'  198.66 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  753.37 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/test.txt
'readFile'  208.65 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  411.14 ms


In [5]:
# Load GloVe embedding matrix

# global_embed_dim = 50
global_embed_dim = 300

embedding_matrix = construct_embedding_matrix( global_embed_dim, 
                                               global_max_features)

Loading vectors from data/es/cc.es.300.zip
Parsing file: data/es/cc.es.300.zip:cc.es.300.vec
Found 2,000,000 words.
Parsing vectors... Done! (W.shape = (2000003, 300))


In [6]:
# Get Y

# cat train/dev/test to make sure we have all labels in test set

# encoding 1-hot for ner targets
trainY_cat = to_categorical(trainY.astype('float32'))
devY_cat = to_categorical(devY.astype('float32'), num_classes=trainY_cat.shape[1])
testY_cat = to_categorical(testY.astype('float32'), num_classes=trainY_cat.shape[1])



In [7]:
trainY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), trainY_cat)), dtype=np.float)
devY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), devY_cat)), dtype=np.float)
testY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), testY_cat)), dtype=np.float)

In [8]:
# Get decoder Y -- 50 dim embedding of center word
train_decoderY = embedding_matrix[trainX[:,4]]
dev_decoderY = embedding_matrix[devX[:,4]]
test_decoderY = embedding_matrix[testX[:,4]]

In [9]:
# AZ's take on keras's to_categorical method, but if we DON'T want a one-hot vector
# instead, the values of the vectors are 1 for ALL of the features present, so one token's vector
# could have a 1 in more than one dimension here

def to_almost_categorical(y, num_classes):
    almost_categorical_array = np.zeros((y.shape[0], y.shape[1], num_classes))
#     print(almost_categorical_array.shape)
    for i in range(len(y[0])):
#         print("window:\t",window)
        window = y[i]
        window_index = i
#         print("window_index:\t", i)
#         print("window:\t", window)
        for j in range(len(window)):
            token = window[j]
            token_index = j
#             print("token_index:\t", token_index)
#             print("token:\t",token)
            for value in token:
#                 print("value:\t",value)
#                 print("token_index:\t",token_index)
                value = int(value)
                almost_categorical_array[i,token_index,value] += 1
#                 print(almost_categorical_array[i])
#     print(almost_categorical_array)
    return almost_categorical_array

In [10]:
# Get X pos tags

# encoding 1-hot for pos tags
trainX_pos_cat = to_almost_categorical(trainX_pos, num_classes=vocabData.posTags.size)
devX_pos_cat = to_almost_categorical(devX_pos, num_classes=vocabData.posTags.size) 
testX_pos_cat = to_almost_categorical(testX_pos, num_classes=vocabData.posTags.size)

[[[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]
[[[1. 0. 0

In [11]:
trainX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_pos_cat)), dtype=np.float)
devX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_pos_cat)), dtype=np.float)
testX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_pos_cat)), dtype=np.float)

In [12]:
# Get X capitalization 

# encoding 1-hot for capitalization info  ("allCaps", "upperInitial", "lowercase", "mixedCaps", "noinfo")
trainX_capitals_cat = to_categorical(trainX_capitals.astype('float32'))
devX_capitals_cat = to_categorical(devX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2]) 
testX_capitals_cat = to_categorical(testX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2])

trainX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_capitals_cat)), dtype=np.float)
devX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_capitals_cat)), dtype=np.float)
testX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_capitals_cat)), dtype=np.float)

## Final Models

In [13]:
## AZ: if we want to use features
devX_dict = {"x":devX, "x_pos":devX_pos_cat, "x_capital":devX_capitals_cat}
testX_dict = {"x":testX, "x_pos":testX_pos_cat, "x_capital":testX_capitals_cat}

## AZ: if we don't want to load the features ("x_pos" here)
# devX_dict = {"x":devX, "x_capital":devX_capitals_cat}
# testX_dict = {"x":testX, "x_capital":testX_capitals_cat}

modelDir = { "save_dir" : "result"}

In [14]:
print("Preprocessing Time")
print("--- %s seconds ---" % (time.time() - start_time))

Preprocessing Time
--- 207.08925342559814 seconds ---


## Capsnet model

### no features

In [16]:
start_time_caps_1 = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "glove_learn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

print("Testing Time for caps glove learn")
print("--- %s seconds ---" % (time.time() - start_time_caps_1))

cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm



Retrieving model: glove_learn_es_100_base
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)       (None

ValueError: Shape of passed values is (17, 17), indices imply (18, 18)

In [17]:
start_time_caps_2 = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "glove_nolearn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

print("Testing Time for caps glove no learn")
print("--- %s seconds ---" % (time.time() - start_time_caps_2))

cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm



Retrieving model: glove_nolearn_es_100_base
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)       (No

ValueError: Shape of passed values is (17, 17), indices imply (18, 18)

### with features

In [None]:
# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
# modelName = "glove_nolearn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_features"
# myModel = retrieve_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# cm = report_obj4.get_confusion_matrix( testY, y_pred)
# print()
# cm

### Model B (Model A + Decoder)

In [None]:
# # model B - Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps
# # modelName = "Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps"
# modelName = "glove_nolearn_base"
# myModel = retrieve_decoder_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred, raw_eval_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# #cm = report_obj4.get_confusion_matrix( testY, y_pred)
# #print()
# #cm

## CNN

### no features

In [18]:
start_time_cnn_1 = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "glove_learn_cnn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

print("Testing Time for cnn glove learn")
print("--- %s seconds ---" % (time.time() - start_time_cnn_1))

cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm



W0526 11:21:48.604022 140139768379200 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0526 11:21:48.606286 140139768379200 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:131: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0526 11:21:48.606808 140139768379200 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0526 11:21:48.615356 140139768379200 deprecation.py:506] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be rem

Retrieving model: glove_learn_cnn_es_100_base
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          (

ValueError: Shape of passed values is (17, 17), indices imply (18, 18)

In [19]:
start_time_cnn_2 = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "glove_nolearn_cnn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

print("Testing Time for cnn glove no learn")
print("--- %s seconds ---" % (time.time() - start_time_cnn_2))

cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm



Retrieving model: glove_nolearn_cnn_es_100_base
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)         

ValueError: Shape of passed values is (17, 17), indices imply (18, 18)

### with features

In [None]:
# # CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
# modelName = "glove_nolearn_cnn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_features"
# myModel = retrieve_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# cm = report_obj4.get_confusion_matrix( testY, y_pred)
# print()
# cm