# Model Testing
> Final testing results for best models

In [1]:
%load_ext autoreload
%autoreload 2
from importlib import reload

import numpy as np
#import time # !
import json
#from matplotlib import pyplot as plt

from loadutils import retrieve_decoder_model, retrieve_model
import evaluation_helper as eh
import tensorflow as tf
import pandas as pd
from keras.utils import to_categorical

import glove_helper
from loadutils import conll2003Data, saveProcessedData, retrieve_model
from common import vocabulary, utils


Using TensorFlow backend.


In [2]:
## AZ UPDATE 4/21/2020:  I switched the columns back in load_utils.py
# I also re-generated the data files, so results may differ from before

"""
Pick which language to test on
"""
# LANGUAGE = "es"
LANGUAGE = "ca"

"""
Pick how much of the training data to use
"""
# TRAIN_AMOUNT = "100"
# TRAIN_AMOUNT = "50"
TRAIN_AMOUNT = "10"

DIRECTORY = "../data/pos_tagging/"+LANGUAGE+"/"

#training file depends on low-resource or not; 100%, 50%, or 10% of training data used
TRAIN_FILE = DIRECTORY+"train_"+TRAIN_AMOUNT+".txt"

# dev, test, and vectors stay the same
DEV_FILE = DIRECTORY+"dev.txt"
TEST_FILE = DIRECTORY+"test.txt"
VECTORS = "data/"+LANGUAGE+"/wiki."+LANGUAGE+".zip"

# # out files for IPC
# HYPER_PARAM_FILE = "hyper_params.json"

# VOCAB_SIZE = 20000

## PRINT OUT HYPERPARAMETERS FOR REFERENCE
print("LANGUAGE:\t", LANGUAGE)
print("TRAIN_AMOUNT:\t", TRAIN_AMOUNT)

LANGUAGE:	 ca
TRAIN_AMOUNT:	 10


## Local helper utils

In [3]:
def construct_embedding_matrix(embed_dim, vocab_size):
    """
    construct embedding matrix from GloVe 6Bn word data
    
    reuse glove_helper code from w266 
    
    Returns: an embedding matrix directly plugged into keras.layers.Embedding(weights=[embedding_matrix])
    """
    reload(glove_helper)
    hands = glove_helper.Hands(vector_zip=VECTORS, ndim=embed_dim)
    embedding_matrix = np.zeros((vocab_size, embed_dim))
    
    for i in range(vocabData.vocab.size):
        word = vocabData.vocab.ids_to_words([i])[0]
        try:
            embedding_vector = hands.get_vector(word)
        except:
            embedding_vector = hands.get_vector("<unk>")
        embedding_matrix[i] = embedding_vector

    return embedding_matrix

## Load the Data

In [4]:
# UPDATES!
global_max_features = 20000
windowLength = 11
#testNumSents = 20000

# Use training set to build vocab here
vocabData = conll2003Data(TRAIN_FILE)
vocabData.buildVocab( vocabSize=global_max_features, verbose=True)

# Format training data
trainX, trainX_pos, trainX_capitals, trainY  = vocabData.formatWindowedData( 
                                                  vocabData.train_sentences, 
                                                  windowLength=windowLength,
                                                  verbose=False)

# read in dev data
devSents = vocabData.readFile( DEV_FILE)
devX, devX_pos, devX_capitals, devY = vocabData.formatWindowedData( 
                                              devSents, 
                                              windowLength=windowLength,
                                              verbose=False)

# read in the test data
testSents = vocabData.readFile( TEST_FILE)
testX, testX_pos, testX_capitals, testY = vocabData.formatWindowedData( 
                                                testSents, 
                                                windowLength=windowLength,
                                                verbose=False)

----------------------------------------------------
reading file from path ../data/pos_tagging/ca/train_10.txt
'readFile'  239.80 ms
----------------------------------------------------
building vocabulary from TRAINING data...
vocabulary for words, posTags, nerTags built and stored in object
vocab size = 20000
10 sampled words from vocabulary
 ['manifesten', 'recomanació', 'estrangeria', 'partits', 'amsterdam', 'violenta', 'least', 'efectius', 'annan', 'inèdits'] 

number of unique pos Tags in training = 63
all posTags used
 ['Person=1', 'AdpType=Prep', 'PunctSide=Ini', 'PrepCase=Npr', 'Person=2', 'NumType=Frac', 'Case=Dat', 'AdvType=Tim', 'Mood=Sub', 'PunctType=Qest', 'Mood=Imp', 'PunctType=Dash', 'Polite=Form', 'NumType=Ord', 'VerbForm=Part', 'Degree=Cmp', 'PronType=Int', 'Tense=Fut', 'PunctType=Brck', 'AdpType=Preppron', 'PronType=Prs', 'Definite=Def', 'PunctType=Quot', 'Polarity=Neg', 'Case=Acc,Nom', 'Poss=Yes', '_', 'Mood=Cnd', 'PunctType=Peri', 'Tense=Pres', 'NumForm=Digit', 'P

In [5]:
# Load GloVe embedding matrix

# global_embed_dim = 50
global_embed_dim = 300

embedding_matrix = construct_embedding_matrix( global_embed_dim, 
                                               global_max_features)

Loading vectors from data/ca/wiki.ca.zip
Parsing file: data/ca/wiki.ca.zip:wiki.ca.vec
Found 490,566 words.
Parsing vectors... Done! (W.shape = (490569, 300))


In [6]:
# Get Y

# cat train/dev/test to make sure we have all labels in test set

# encoding 1-hot for ner targets
trainY_cat = to_categorical(trainY.astype('float32'))
devY_cat = to_categorical(devY.astype('float32'), num_classes=trainY_cat.shape[1])
testY_cat = to_categorical(testY.astype('float32'), num_classes=trainY_cat.shape[1])



In [7]:
trainY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), trainY_cat)), dtype=np.float)
devY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), devY_cat)), dtype=np.float)
testY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), testY_cat)), dtype=np.float)

In [8]:
# Get decoder Y -- 50 dim embedding of center word
train_decoderY = embedding_matrix[trainX[:,4]]
dev_decoderY = embedding_matrix[devX[:,4]]
test_decoderY = embedding_matrix[testX[:,4]]

In [9]:
# AZ's take on keras's to_categorical method, but if we DON'T want a one-hot vector
# instead, the values of the vectors are 1 for ALL of the features present, so one token's vector
# could have a 1 in more than one dimension here

def to_almost_categorical(y, num_classes):
    almost_categorical_array = np.zeros((y.shape[0], y.shape[1], num_classes))
#     print(almost_categorical_array.shape)
    for i in range(len(y[0])):
#         print("window:\t",window)
        window = y[i]
        window_index = i
#         print("window_index:\t", i)
#         print("window:\t", window)
        for j in range(len(window)):
            token = window[j]
            token_index = j
#             print("token_index:\t", token_index)
#             print("token:\t",token)
            for value in token:
#                 print("value:\t",value)
#                 print("token_index:\t",token_index)
                value = int(value)
                almost_categorical_array[i,token_index,value] += 1
#                 print(almost_categorical_array[i])
    print(almost_categorical_array)
    return almost_categorical_array

In [10]:
# Get X pos tags

# encoding 1-hot for pos tags
trainX_pos_cat = to_almost_categorical(trainX_pos, num_classes=vocabData.posTags.size)
devX_pos_cat = to_almost_categorical(devX_pos, num_classes=vocabData.posTags.size) 
testX_pos_cat = to_almost_categorical(testX_pos, num_classes=vocabData.posTags.size)

[[[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]
[[[1. 0. 0

In [11]:
trainX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_pos_cat)), dtype=np.float)
devX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_pos_cat)), dtype=np.float)
testX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_pos_cat)), dtype=np.float)

In [12]:
# Get X capitalization 

# encoding 1-hot for capitalization info  ("allCaps", "upperInitial", "lowercase", "mixedCaps", "noinfo")
trainX_capitals_cat = to_categorical(trainX_capitals.astype('float32'))
devX_capitals_cat = to_categorical(devX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2]) 
testX_capitals_cat = to_categorical(testX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2])

trainX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_capitals_cat)), dtype=np.float)
devX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_capitals_cat)), dtype=np.float)
testX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_capitals_cat)), dtype=np.float)

## Final Models

In [13]:
## AZ: if we want to use features
devX_dict = {"x":devX, "x_pos":devX_pos_cat, "x_capital":devX_capitals_cat}
testX_dict = {"x":testX, "x_pos":testX_pos_cat, "x_capital":testX_capitals_cat}

## AZ: if we don't want to load the features ("x_pos" here)
# devX_dict = {"x":devX, "x_capital":devX_capitals_cat}
# testX_dict = {"x":testX, "x_capital":testX_capitals_cat}

modelDir = { "save_dir" : "result"}

## Capsnet model

### no features

In [14]:
# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "glove_nolearn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

W0505 13:21:38.540382 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0505 13:21:38.543392 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0505 13:21:38.657084 140267461789504 deprecation.py:506] From /home/andrew/Documents/CapsuleNetworks/CapsNet_for_NER_zupon/code/capsulelayers.py:136: calling softmax (from tensorflow.python.ops.nn_ops) with dim is deprecated and will be removed in a future version.
Instructions for updating:
dim is deprecated, use axis instead


Retrieving model: glove_nolearn_ca_10_base


W0505 13:21:38.763386 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0505 13:21:38.764033 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W0505 13:21:38.764779 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:186: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

W0505 13:21:38.793290 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

W0505 13:21:38.794171 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           6000000   
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)       (None, 17, 16)            487424    
__________

  gold_idx_dict[gold] = np.hstack(self.gold_pred_idx_dict[gold].values())
W0505 13:22:02.411960 140267461789504 module_wrapper.py:139] From /home/andrew/Documents/CapsuleNetworks/CapsNet_for_NER_zupon/code/evaluation_helper.py:308: The name tf.confusion_matrix is deprecated. Please use tf.math.confusion_matrix instead.



Precision 0.8614329980050087
Recall	 0.8572303533337557
F1	 0.8593265373093248



Unnamed: 0,NOUN,ADP,DET,PUNCT,PROPN,VERB,ADJ,PRON,AUX,ADV,CCONJ,SCONJ,NUM,SYM,_,PART,INTJ
NOUN,9091,43,48,12,1067,50,162,26,12,80,4,6,66,0,1,0,0
ADP,14,9337,45,3,123,14,26,2,1,27,2,1,10,0,2,0,0
DET,45,58,7568,0,208,7,20,12,1,38,0,3,7,2,0,0,0
PUNCT,2,32,4,5705,86,0,14,2,0,0,0,0,1,7,2,0,0
PROPN,1015,56,35,3,4109,12,248,50,4,5,0,4,23,2,1,0,0
VERB,126,227,69,8,424,3117,117,31,104,9,1,0,90,15,2,0,0
ADJ,269,84,106,13,520,31,2132,27,1,13,0,1,58,0,2,0,0
PRON,67,46,179,2,79,26,21,1841,3,77,0,133,7,6,0,0,0
AUX,3,2,0,0,8,58,2,2,2251,5,0,0,2,0,0,0,0
ADV,45,18,12,0,39,10,50,14,0,1382,7,9,8,1,0,0,0


### with features

In [15]:
# model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName = "glove_nolearn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_features"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

Retrieving model: glove_nolearn_ca_10_features
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
x (InputLayer)                  (None, 11)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 11, 300)      6000000     x[0][0]                          
__________________________________________________________________________________________________
x_pos (InputLayer)              (None, 11, 60)       0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 11, 360)      0           embedding_1[0][0]                
                                                              

  gold_idx_dict[gold] = np.hstack(self.gold_pred_idx_dict[gold].values())


Unnamed: 0,NOUN,ADP,DET,PUNCT,PROPN,VERB,ADJ,PRON,AUX,ADV,CCONJ,SCONJ,NUM,SYM,_,PART,INTJ
NOUN,8662,134,78,3,1261,92,108,31,19,60,5,10,199,4,2,0,0
ADP,5,9483,34,0,55,15,1,0,0,6,0,0,8,0,0,0,0
DET,32,150,7537,0,123,24,17,20,2,28,0,4,32,0,0,0,0
PUNCT,3,75,2,5695,66,1,7,2,1,1,0,0,1,0,1,0,0
PROPN,958,148,119,0,3902,24,268,68,4,3,0,2,63,3,5,0,0
VERB,68,560,47,0,210,3214,41,25,65,4,0,0,87,14,5,0,0
ADJ,262,351,117,0,423,100,1829,37,0,14,0,1,117,5,1,0,0
PRON,56,69,179,0,56,41,20,1885,3,48,0,119,6,5,0,0,0
AUX,2,11,0,0,3,102,0,2,2211,0,0,0,2,0,0,0,0
ADV,51,80,13,0,27,34,40,23,3,1285,7,17,14,1,0,0,0


### Model B (Model A + Decoder)

In [16]:
# # model B - Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps
# # modelName = "Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps"
# modelName = "glove_nolearn_base"
# myModel = retrieve_decoder_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred, raw_eval_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# #cm = report_obj4.get_confusion_matrix( testY, y_pred)
# #print()
# #cm

## CNN

### no features

In [17]:
# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "glove_nolearn_cnn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_base"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

W0505 13:22:27.213852 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0505 13:22:27.215746 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:131: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0505 13:22:27.216399 140267461789504 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0505 13:22:27.221460 140267461789504 deprecation.py:506] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be rem

Retrieving model: glove_nolearn_cnn_ca_10_base
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           6000000   
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          

  gold_idx_dict[gold] = np.hstack(self.gold_pred_idx_dict[gold].values())


Unnamed: 0,NOUN,ADP,DET,PUNCT,PROPN,VERB,ADJ,PRON,AUX,ADV,CCONJ,SCONJ,NUM,SYM,_,PART,INTJ
NOUN,9068,261,242,35,632,66,207,60,6,24,11,28,28,0,0,0,0
ADP,121,9007,172,8,113,54,28,16,8,11,11,22,36,0,0,0,0
DET,152,149,7424,2,84,18,71,14,0,1,0,6,48,0,0,0,0
PUNCT,36,56,24,5565,85,9,26,6,0,20,16,11,1,0,0,0,0
PROPN,1162,215,173,69,3582,19,195,99,2,20,7,11,11,2,0,0,0
VERB,321,296,214,12,55,3050,116,66,53,45,1,96,13,2,0,0,0
ADJ,927,296,136,22,265,74,1405,27,1,47,23,13,18,3,0,0,0
PRON,223,59,260,3,52,96,33,1518,2,105,5,129,2,0,0,0,0
AUX,12,12,5,0,3,599,17,4,1665,5,0,11,0,0,0,0,0
ADV,357,85,63,19,46,137,202,353,2,205,24,99,3,0,0,0,0


### with features

In [18]:
# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "glove_nolearn_cnn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_features"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))
cm = report_obj4.get_confusion_matrix( testY, y_pred)
print()
cm

Retrieving model: glove_nolearn_cnn_ca_10_features
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
x (InputLayer)                  (None, 11)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 11, 300)      6000000     x[0][0]                          
__________________________________________________________________________________________________
x_pos (InputLayer)              (None, 11, 60)       0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 11, 360)      0           embedding_1[0][0]                
                                                          

  gold_idx_dict[gold] = np.hstack(self.gold_pred_idx_dict[gold].values())


Precision 0.77781694795752
Recall	 0.7455701281970052
F1	 0.7613522402544886



Unnamed: 0,NOUN,ADP,DET,PUNCT,PROPN,VERB,ADJ,PRON,AUX,ADV,CCONJ,SCONJ,NUM,SYM,_,PART,INTJ
NOUN,9041,191,211,35,708,141,152,91,12,35,20,27,4,0,0,0,0
ADP,174,9100,112,8,122,26,14,12,16,4,9,6,4,0,0,0,0
DET,214,125,7302,9,142,37,55,36,0,5,26,17,1,0,0,0,0
PUNCT,23,50,12,5603,115,5,11,4,2,8,18,4,0,0,0,0,0
PROPN,1094,182,105,98,3770,20,160,85,6,24,14,8,0,1,0,0,0
VERB,236,408,145,21,108,3009,51,72,155,42,15,75,3,0,0,0,0
ADJ,829,310,172,22,359,186,1190,59,18,55,36,15,5,1,0,0,0
PRON,232,58,188,14,43,104,7,1613,14,31,29,154,0,0,0,0,0
AUX,6,22,1,0,4,333,2,5,1956,2,1,1,0,0,0,0,0
ADV,300,127,46,36,52,178,85,395,8,124,97,147,0,0,0,0,0
