In [1]:
import os, sys
import numpy as np
import matplotlib.pyplot as plt
import pickle

import tensorflow as tf
from tensorflow.keras.models import load_model

In [2]:
## Ensure to mount Google Drive, if you save /Demo there
workspace = 'YOUR_DRIVE_PATH/Demo/'
print(os.path.exists(workspace))

True


In [3]:
## Load data
current_data_dir = workspace + 'current_data/'

filehandler = open(current_data_dir + "index_word.pkl", "rb")
index_word = pickle.load(filehandler)
filehandler.close()

filehandler = open(current_data_dir + "x_test.pkl", "rb")
x_test = pickle.load(filehandler)   
filehandler.close()

filehandler = open(current_data_dir + "y_test.pkl", "rb")
y_test = pickle.load(filehandler) 
filehandler.close()

filehandler = open(current_data_dir + "nl_test.pkl", "rb")
nl_test = pickle.load(filehandler)  
filehandler.close()

In [4]:
## Load model
model = load_model(workspace + 'current_model.h5')
model.summary()
# tf.keras.utils.plot_model(model)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, None, 128)         156672    
                                                                 
 bidirectional_2 (Bidirectio  (None, 256)              198144    
 nal)                                                            
                                                                 
 repeat_vector_1 (RepeatVect  (None, 50, 256)          0         
 or)                                                             
                                                                 
 bidirectional_3 (Bidirectio  (None, 50, 256)          296448    
 nal)                                                            
                                                                 
 time_distributed_2 (TimeDis  (None, 50, 512)          131584    
 tributed)                                            

In [5]:
def contract_numeral(chars):
  string = ''
  for c in chars:
    if c == '[neg]':
      string += '-'
    elif c == '[dot]':
      string += '.'
    else:
      string += c
  return string

In [6]:
def proc_num_in_text(text, nl):
  num_idx = np.array([*range(len(text))])[nl == 1]
  text_before = text[:num_idx[0]]
  text_after = text[num_idx[-1]+1:]
  numeral = contract_numeral(text[nl == 1])
  return list(text_before) + [numeral] + list(text_after)

In [7]:
def contract_words(words):
  text = ''
  for w in words:
    text += ' ' + w
  return text[1:]

In [8]:
## Test using the test set (that was not used in training)
n = np.random.choice(len(x_test))   # randonly select an example 

seq = x_test[n][x_test[n] != 0]   # sequence
nl = nl_test[n][x_test[n] != 0]   # numeral label
el = y_test[n][x_test[n] != 0]    # ground-truth entity label (1-unit, 2-target metric)
# print(el)

el_pred = model(x_test[n][None, ...]) # predicted entity label
el_pred = tf.argmax(el_pred, axis=-1)
el_pred = el_pred[0][x_test[n] != 0]
el_pred = el_pred.numpy()
# print(el_pred)

text = np.array([index_word[i] for i in seq])
print('Text #{} is:'.format(n), contract_words(proc_num_in_text(text, nl)))

num = text[nl == 1]
print('Num is:', contract_numeral(num))

print('---Ground-Truth---')
unit = text[el == 1]
print('Unit is:', contract_words(unit))
targ = text[el == 2]
print('Targ is:', contract_words(targ))

print('---Prediction---')
unit_pred = text[el_pred == 1]
print('Unit is:', contract_words(unit_pred))
targ_pred = text[el_pred == 2]
print('Targ is:', contract_words(targ_pred))

Text #141 is: median S100B concentrations at day 2.0 after intervention were lower in
Num is: 2.0
---Ground-Truth---
Unit is: 
Targ is: day
---Prediction---
Unit is: 
Targ is: day
