# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Checkpoint" data-toc-modified-id="Checkpoint-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Checkpoint</a></div><div class="lev1 toc-item"><a href="#Build-Model" data-toc-modified-id="Build-Model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Build Model</a></div><div class="lev2 toc-item"><a href="#Set-Hyperparameters" data-toc-modified-id="Set-Hyperparameters-21"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Set Hyperparameters</a></div><div class="lev2 toc-item"><a href="#Import-Libraries" data-toc-modified-id="Import-Libraries-22"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Import Libraries</a></div><div class="lev2 toc-item"><a href="#Build-Graph" data-toc-modified-id="Build-Graph-23"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Build Graph</a></div><div class="lev2 toc-item"><a href="#Model-Visualization" data-toc-modified-id="Model-Visualization-24"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Model Visualization</a></div><div class="lev2 toc-item"><a href="#Train" data-toc-modified-id="Train-25"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Train</a></div><div class="lev1 toc-item"><a href="#Evaluate" data-toc-modified-id="Evaluate-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Evaluate</a></div><div class="lev2 toc-item"><a href="#Naive-Evaluate" data-toc-modified-id="Naive-Evaluate-31"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Naive Evaluate</a></div><div class="lev2 toc-item"><a href="#Greedy-1-best-Search-Evaluate" data-toc-modified-id="Greedy-1-best-Search-Evaluate-32"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Greedy 1-best Search Evaluate</a></div><div class="lev2 toc-item"><a href="#Beam-Search-Evaluate" data-toc-modified-id="Beam-Search-Evaluate-33"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Beam Search Evaluate</a></div>

# Checkpoint

In [1]:
import h5py
import pickle

In [2]:
with h5py.File('allData.h5', 'r') as fh:
    x_train = fh['x_train'][:]
    tf_train = fh['tf_train'][:]
    y_train = fh['y_train'][:]
    x_val = fh['x_val'][:]
    tf_val = fh['tf_val'][:]
    y_val = fh['y_val'][:]
    x_train_all = fh['x_train_all'][:]
    tf_train_all = fh['tf_train_all'][:]
    y_train_all = fh['y_train_all'][:]
    x_test = fh['x_test'][:]
    tf_test = fh['tf_test'][:]
    y_test = fh['y_test'][:]
    embedding = fh['embedding'][:]

In [3]:
with open('index.pkl', 'rb') as fp:
    word2index, index2word = pickle.load(fp)

# Build Model

##  Set Hyperparameters

In [4]:
MAX_SENT_LEN = 64
MAX_ADJL_LEN = 3
VOCAB_SIZE = len(word2index)+1
NUM_CLASSES = VOCAB_SIZE
EMBEDDING_SIZE = 300
TF_EMBEDDING_SIZE = 300

ENC_1_RNN_SIZE = 300
ENC_2_RNN_SIZE = 150
DEC_RNN_SIZE = 150
DROPOUT_RATE = 0.5
RNN_DROPOUT_RATE = 0.2
NUM_EPOCHS = 512
BATCH_SIZE = 79
STEPS_PER_EPOCH = 20
TEST_STEPS = len(x_test)//BATCH_SIZE

VALIDATION_STEPS = 3

## Import Libraries

In [5]:
from keras.layers import Input, Embedding, Dropout, Bidirectional, LSTM, RepeatVector, concatenate, TimeDistributed, Dense
from keras.models import Model
import keras.backend as K
from keras.callbacks import*
from keras.utils import to_categorical

Using TensorFlow backend.


## Build Graph

In [8]:
K.clear_session()
sequence = Input(shape=(MAX_SENT_LEN,), name='INPUT') 
emb_seq = Embedding(VOCAB_SIZE, EMBEDDING_SIZE, weights=[embedding], mask_zero=True, input_length=MAX_SENT_LEN, trainable=False, name='EMBEDDING')(sequence)
tf_seq = Input(shape=(MAX_ADJL_LEN,), name='TF_INPUT')
tf_emb = Embedding(NUM_CLASSES, TF_EMBEDDING_SIZE, weights=[embedding], mask_zero=True, input_length=MAX_ADJL_LEN, trainable=False, name='TF_EMBEDDING')(tf_seq)
blstm = Bidirectional(LSTM(ENC_1_RNN_SIZE, return_sequences=True, implementation=0, dropout=RNN_DROPOUT_RATE, recurrent_dropout=RNN_DROPOUT_RATE), merge_mode='concat', name='ENC_BLSTM_1')(emb_seq)
blstm = Dropout(DROPOUT_RATE)(blstm)
blstm = Bidirectional(LSTM(ENC_2_RNN_SIZE, return_sequences=False, implementation=0, dropout=RNN_DROPOUT_RATE, recurrent_dropout=RNN_DROPOUT_RATE), merge_mode='concat', name='ENC_BLSTM_2')(blstm)
blstm = Dropout(DROPOUT_RATE)(blstm)
context = RepeatVector(MAX_ADJL_LEN, name='CONTEXT')(blstm)
context = concatenate([context, tf_emb], axis=-1)
blstm = Bidirectional(LSTM(DEC_RNN_SIZE, return_sequences=True, implementation=0, dropout=RNN_DROPOUT_RATE, recurrent_dropout=RNN_DROPOUT_RATE, name='DEC_LSTM'), merge_mode='concat', name='DEC_BLSTM')(context)
lstm = Dropout(DROPOUT_RATE)(blstm)
output = TimeDistributed(Dense(NUM_CLASSES, activation='softmax'), name='OUTPUT')(lstm)
model = Model(inputs=[sequence, tf_seq], outputs=output)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

## Model Visualization

In [9]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
INPUT (InputLayer)               (None, 64)            0                                            
____________________________________________________________________________________________________
EMBEDDING (Embedding)            (None, 64, 300)       17652300    INPUT[0][0]                      
____________________________________________________________________________________________________
ENC_BLSTM_1 (Bidirectional)      (None, 64, 600)       1442400     EMBEDDING[0][0]                  
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 64, 600)       0           ENC_BLSTM_1[0][0]                
___________________________________________________________________________________________

## Train

In [10]:
def gen_label(s):
    """
    One-hot encoding
    """
    gen = to_categorical(s, num_classes=NUM_CLASSES)
    return gen

def data_generator_all(data, label, batch_size):
    """
    Yield batches of all data
    """
    count = 0
    while True:
        if count >= len(data[0]): 
            count = 0
        x_1 = np.zeros((batch_size, MAX_SENT_LEN))
        x_2 = np.zeros((batch_size, MAX_ADJL_LEN))
        y = np.zeros((batch_size, MAX_ADJL_LEN, NUM_CLASSES))
        for i in range(batch_size):
            n = i + count
            if n > len(data[0])-1:
                break
            x_1[i, :] = data[0][n]
            x_2[i, :] = data[1][n]
            y[i, :, :] = gen_label(label[n])
        count += batch_size
        yield ([x_1, x_2], y)
        
def data_generator(data, label, batch_size): 
    """
    Yield batches 
    """
    index = np.arange(len(data[0]))
    np.random.shuffle(index)    
    batches = [index[range(batch_size*i, min(len(data[0]), batch_size*(i+1)))] for i in range(len(data[0])//batch_size)]
    while True:
        for i in batches:
            x_1 = data[0][i]
            x_2 = data[1][i]
            y = np.array(list(map(gen_label, label[i])))
            yield ([x_1, x_2], y)

In [11]:
gen_train_all = data_generator([x_train_all, tf_train_all], y_train_all, BATCH_SIZE)
gen_test = data_generator_all([x_test, tf_test], y_test, BATCH_SIZE)
#gen_train = data_generator(x_train, y_train, BATCH_SIZE)
#gen_val = data_generator(x_val, y_val, BATCH_SIZE)

In [12]:
# Continue Trian
# filename = 'cp_logs/.hdf5'
# model.load_weights(filename)
# model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])

In [13]:
filepath = 'cp_logs/weights.{epoch:03d}-{val_loss:.6f}.hdf5'
log_string = 'tb_logs/tf'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True)
tensorboard = TensorBoard(log_dir=log_string, 
                          histogram_freq=1, 
                          write_graph=False, 
                          write_grads=False, 
                          batch_size=BATCH_SIZE, 
                          write_images=True, 
                          embeddings_freq=1, 
                          embeddings_layer_names=None,
                          embeddings_metadata=None) 

In [12]:
history = model.fit_generator(gen_train_all, 
                              steps_per_epoch=STEPS_PER_EPOCH, 
                              epochs=NUM_EPOCHS, 
                              verbose=1,
                              callbacks=[checkpoint, tensorboard],
                              validation_data=gen_test, 
                              validation_steps=TEST_STEPS)

Epoch 1/512
Epoch 2/512
Epoch 3/512
Epoch 4/512
Epoch 5/512
Epoch 6/512
Epoch 7/512
Epoch 8/512
Epoch 9/512
Epoch 10/512
Epoch 11/512
Epoch 12/512
Epoch 13/512
Epoch 14/512
Epoch 15/512
Epoch 16/512
Epoch 17/512
Epoch 18/512
Epoch 19/512
Epoch 20/512
Epoch 21/512
Epoch 22/512
Epoch 23/512
Epoch 24/512
Epoch 25/512
Epoch 26/512
Epoch 27/512
Epoch 28/512
Epoch 29/512
Epoch 30/512
Epoch 31/512
Epoch 32/512
Epoch 33/512
Epoch 34/512
Epoch 35/512
Epoch 36/512
Epoch 37/512
Epoch 38/512
Epoch 39/512
Epoch 40/512
Epoch 41/512
Epoch 42/512
Epoch 43/512
Epoch 44/512
Epoch 45/512
Epoch 46/512
Epoch 47/512
Epoch 48/512
Epoch 49/512
Epoch 50/512
Epoch 51/512
Epoch 52/512
Epoch 53/512
Epoch 54/512
Epoch 55/512
Epoch 56/512
Epoch 57/512
Epoch 58/512
Epoch 59/512
Epoch 60/512
Epoch 61/512
Epoch 62/512
Epoch 63/512
Epoch 64/512
Epoch 65/512
Epoch 66/512
Epoch 67/512
Epoch 68/512
Epoch 69/512


Epoch 70/512
Epoch 71/512
Epoch 72/512
Epoch 73/512
Epoch 74/512
Epoch 75/512
Epoch 76/512
Epoch 77/512
Epoch 78/512
Epoch 79/512
Epoch 80/512
Epoch 81/512
Epoch 82/512
Epoch 83/512
Epoch 84/512
Epoch 85/512
Epoch 86/512
Epoch 87/512
Epoch 88/512
Epoch 89/512
Epoch 90/512
Epoch 91/512
Epoch 92/512
Epoch 93/512
Epoch 94/512
Epoch 95/512
Epoch 96/512
Epoch 97/512
Epoch 98/512
Epoch 99/512
Epoch 100/512
Epoch 101/512
Epoch 102/512
Epoch 103/512
Epoch 104/512
Epoch 105/512
Epoch 106/512
Epoch 107/512
Epoch 108/512
Epoch 109/512
Epoch 110/512
Epoch 111/512
Epoch 112/512
Epoch 113/512
Epoch 114/512
Epoch 115/512
Epoch 116/512
Epoch 117/512
Epoch 118/512
Epoch 119/512
Epoch 120/512
Epoch 121/512
Epoch 122/512
Epoch 123/512
Epoch 124/512
Epoch 125/512
Epoch 126/512
Epoch 127/512
Epoch 128/512
Epoch 129/512
Epoch 130/512
Epoch 131/512
Epoch 132/512
Epoch 133/512
Epoch 134/512
Epoch 135/512
Epoch 136/512
Epoch 137/512
Epoch 138/512
Epoch 139/512
Epoch 140/512
Epoch 141/512
Epoch 142/512
Epoch 14

Epoch 147/512
Epoch 148/512
Epoch 149/512
Epoch 150/512
Epoch 151/512
Epoch 152/512
Epoch 153/512
Epoch 154/512
Epoch 155/512
Epoch 156/512
Epoch 157/512
Epoch 158/512
Epoch 159/512
Epoch 160/512
Epoch 161/512
Epoch 162/512
Epoch 163/512
Epoch 164/512
Epoch 165/512
Epoch 166/512
Epoch 167/512
Epoch 168/512
Epoch 169/512
Epoch 170/512
Epoch 171/512
Epoch 172/512
Epoch 173/512
Epoch 174/512
Epoch 175/512
Epoch 176/512
Epoch 177/512
Epoch 178/512
Epoch 179/512
Epoch 180/512
Epoch 181/512
Epoch 182/512
Epoch 183/512
Epoch 184/512
Epoch 185/512
Epoch 186/512
Epoch 187/512
Epoch 188/512
Epoch 189/512
Epoch 190/512
Epoch 191/512
Epoch 192/512
Epoch 193/512
Epoch 194/512
Epoch 195/512
Epoch 196/512
Epoch 197/512
Epoch 198/512
Epoch 199/512
Epoch 200/512
Epoch 201/512
Epoch 202/512
Epoch 203/512
Epoch 204/512
Epoch 205/512
Epoch 206/512
Epoch 207/512
Epoch 208/512
Epoch 209/512
Epoch 210/512
Epoch 211/512
Epoch 212/512
Epoch 213/512
Epoch 214/512
Epoch 215/512
Epoch 216/512
Epoch 217/512
Epoch 

Epoch 227/512
Epoch 228/512
Epoch 229/512
Epoch 230/512
Epoch 231/512
Epoch 232/512
Epoch 233/512
Epoch 234/512
Epoch 235/512
Epoch 236/512
Epoch 237/512
Epoch 238/512
Epoch 239/512
Epoch 240/512
Epoch 241/512
Epoch 242/512
Epoch 243/512
Epoch 244/512
Epoch 245/512
Epoch 246/512
Epoch 247/512
Epoch 248/512
Epoch 249/512
Epoch 250/512
Epoch 251/512
Epoch 252/512
Epoch 253/512
Epoch 254/512
Epoch 255/512
Epoch 256/512
Epoch 257/512
Epoch 258/512
Epoch 259/512
Epoch 260/512
Epoch 261/512
Epoch 262/512
Epoch 263/512
Epoch 264/512
Epoch 265/512
Epoch 266/512
Epoch 267/512
Epoch 268/512
Epoch 269/512
Epoch 270/512
Epoch 271/512
Epoch 272/512
Epoch 273/512
Epoch 274/512
Epoch 275/512
Epoch 276/512
Epoch 277/512
Epoch 278/512
Epoch 279/512
Epoch 280/512
Epoch 281/512
Epoch 282/512
Epoch 283/512
Epoch 284/512
Epoch 285/512
Epoch 286/512
Epoch 287/512
Epoch 288/512
Epoch 289/512
Epoch 290/512
Epoch 291/512
Epoch 292/512
Epoch 293/512
Epoch 294/512
Epoch 295/512
Epoch 296/512
Epoch 297/512
Epoch 

Epoch 308/512
Epoch 309/512
Epoch 310/512
Epoch 311/512
Epoch 312/512
Epoch 313/512
Epoch 314/512
Epoch 315/512
Epoch 316/512
Epoch 317/512
Epoch 318/512
Epoch 319/512
Epoch 320/512
Epoch 321/512
Epoch 322/512
Epoch 323/512
Epoch 324/512
Epoch 325/512
Epoch 326/512
Epoch 327/512
Epoch 328/512
Epoch 329/512
Epoch 330/512
Epoch 331/512
Epoch 332/512
Epoch 333/512
Epoch 334/512
Epoch 335/512
Epoch 336/512
Epoch 337/512
Epoch 338/512
Epoch 339/512
Epoch 340/512
Epoch 341/512
Epoch 342/512
Epoch 343/512
Epoch 344/512
Epoch 345/512
Epoch 346/512
Epoch 347/512
Epoch 348/512
Epoch 349/512


Epoch 350/512
Epoch 351/512
Epoch 352/512
Epoch 353/512
Epoch 354/512
Epoch 355/512
Epoch 356/512
Epoch 357/512
Epoch 358/512
Epoch 359/512
Epoch 360/512
Epoch 361/512
Epoch 362/512
Epoch 363/512
Epoch 364/512
Epoch 365/512
Epoch 366/512
Epoch 367/512
Epoch 368/512
Epoch 369/512
Epoch 370/512
Epoch 371/512
Epoch 372/512
Epoch 373/512
Epoch 374/512
Epoch 375/512
Epoch 376/512
Epoch 377/512
Epoch 378/512
Epoch 379/512
Epoch 380/512
Epoch 381/512
Epoch 382/512
Epoch 383/512
Epoch 384/512
Epoch 385/512
Epoch 386/512
Epoch 387/512
Epoch 388/512
Epoch 389/512
Epoch 390/512
Epoch 391/512
Epoch 392/512
Epoch 393/512
Epoch 394/512
Epoch 395/512
Epoch 396/512
Epoch 397/512
Epoch 398/512
Epoch 399/512
Epoch 400/512
Epoch 401/512
Epoch 402/512
Epoch 403/512
Epoch 404/512
Epoch 405/512
Epoch 406/512
Epoch 407/512
Epoch 408/512
Epoch 409/512
Epoch 410/512
Epoch 411/512
Epoch 412/512
Epoch 413/512
Epoch 414/512
Epoch 415/512
Epoch 416/512
Epoch 417/512
Epoch 418/512
Epoch 419/512
Epoch 420/512
Epoch 

Epoch 434/512
Epoch 435/512
Epoch 436/512
Epoch 437/512
Epoch 438/512
Epoch 439/512
Epoch 440/512
Epoch 441/512
Epoch 442/512
Epoch 443/512
Epoch 444/512
Epoch 445/512
Epoch 446/512
Epoch 447/512
Epoch 448/512
Epoch 449/512
Epoch 450/512
Epoch 451/512
Epoch 452/512
Epoch 453/512
Epoch 454/512
Epoch 455/512
Epoch 456/512
Epoch 457/512
Epoch 458/512
Epoch 459/512
Epoch 460/512
Epoch 461/512
Epoch 462/512
Epoch 463/512
Epoch 464/512
Epoch 465/512
Epoch 466/512
Epoch 467/512
Epoch 468/512
Epoch 469/512
Epoch 470/512
Epoch 471/512
Epoch 472/512
Epoch 473/512
Epoch 474/512
Epoch 475/512
Epoch 476/512
Epoch 477/512
Epoch 478/512
Epoch 479/512
Epoch 480/512
Epoch 481/512
Epoch 482/512
Epoch 483/512
Epoch 484/512
Epoch 485/512
Epoch 486/512
Epoch 487/512
Epoch 488/512
Epoch 489/512
Epoch 490/512
Epoch 491/512
Epoch 492/512
Epoch 493/512
Epoch 494/512
Epoch 495/512
Epoch 496/512
Epoch 497/512
Epoch 498/512
Epoch 499/512
Epoch 500/512
Epoch 501/512
Epoch 502/512
Epoch 503/512
Epoch 504/512
Epoch 

# Evaluate

## Naive Evaluate

In [19]:
filename = 'cp_logs/weights.336-2.599974.hdf5'
model.load_weights(filename)

result = np.argmax(model.predict([x_test, tf_test], batch_size=BATCH_SIZE, verbose=1), -1)



In [14]:
preResult = [[index2word[i] for i in sent if i != 0] for sent in result]
actResult = [[index2word[i] for i in sent if i != 0] for sent in y_test]
e1Count = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0]])
e2Count = sum([1 for i in range(395) if actResult[i][1] == preResult[i][1]])
enCount = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0] and actResult[i][1] == preResult[i][1]])
tripleCount = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0] and actResult[i][1] == preResult[i][1] and actResult[i][2] == preResult[i][2]])
print('E1 Accuracy: \t\t%.6f' % (e1Count/395))
print('E2 Accuracy: \t\t%.6f' % (e2Count/395))
print('En Accuracy: \t\t%.6f' % (enCount/395))
print('Triple Accuracy: \t%.6f' % (tripleCount/395))

E1 Accuracy: 		0.600000
E2 Accuracy: 		0.653165
En Accuracy: 		0.362025
Triple Accuracy: 	0.278481


In [15]:
for i in range(395):
    print('Predict: \t%s' % ' '.join(preResult[i]))
    print('Ground-Truth: \t%s' % ' '.join(actResult[i]))
    print('---')

Predict: 	europe italy /location/country/administrative_divisions
Ground-Truth: 	spain italy /location/administrative_division/country
---
Predict: 	westchester connecticut /location/location/contains
Ground-Truth: 	westchester connecticut /location/administrative_division/country
---
Predict: 	ariel israel /people/person/nationality
Ground-Truth: 	dalia israel /people/person/nationality
---
Predict: 	bangalore bihar /location/location/contains
Ground-Truth: 	hyderabad potti /location/location/contains
---
Predict: 	elizabeth north /people/person/place_lived
Ground-Truth: 	kent north /people/person/place_lived
---
Predict: 	ontario waterloo /location/location/contains
Ground-Truth: 	ontario waterloo /location/location/contains
---
Predict: 	baltimore american /business/company/founders
Ground-Truth: 	blackstone stephen /business/company/founders
---
Predict: 	europe italy /location/country/administrative_divisions
Ground-Truth: 	france italy /location/administrative_division/country
--

## Greedy 1-best Search Evaluate

In [68]:
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm

In [69]:
filename = 'cp_logs/weights.336-2.599974.hdf5'
model.load_weights(filename)

In [71]:
preResult = []
for i in tqdm(range(395)):
    pR = []
    tag = 0
    fb_words = np.array([58840])
    x = np.array([x_test[i]])
    
    for j in range(MAX_ADJL_LEN):
        fb_input = pad_sequences([fb_words], maxlen=MAX_ADJL_LEN, padding='post', truncating='post')
        prob = model.predict([x, fb_input], batch_size=BATCH_SIZE)[0][j]
        if j < 2:
            for item in [i for i in x_test[i] if i != 0]:
                prob[item] = float(prob[item]+1)
        if j == 2:
            prob[:58828] = -1
        prob[0] = -1
        prob[tag] = -1
        result = prob.argmax()
        tag = result
        pR.append(result)
        fb_words = np.append(fb_words, result)
    preResult.append(pR)


  0%|          | 0/395 [00:00<?, ?it/s][A
  0%|          | 1/395 [00:00<02:25,  2.70it/s][A
  1%|          | 2/395 [00:00<02:27,  2.66it/s][A
  1%|          | 3/395 [00:01<02:29,  2.62it/s][A
  1%|          | 4/395 [00:01<02:29,  2.61it/s][A
  1%|▏         | 5/395 [00:01<02:32,  2.56it/s][A
100%|██████████| 395/395 [02:18<00:00,  2.93it/s]


In [82]:
preResult = [[index2word[i] for i in sent if i != 0] for sent in preResult]
actResult = [[index2word[i] for i in sent if i != 0] for sent in y_test]
e1Count = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0]])
e2Count = sum([1 for i in range(395) if actResult[i][1] == preResult[i][1]])
enCount = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0] and actResult[i][1] == preResult[i][1]])
tripleCount = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0] and actResult[i][1] == preResult[i][1] and actResult[i][2] == preResult[i][2]])
print('E1 Accuracy: \t\t%.6f' % (e1Count/395))
print('E2 Accuracy: \t\t%.6f' % (e2Count/395))
print('En Accuracy: \t\t%.6f' % (enCount/395))
print('Triple Accuracy: \t%.6f' % (tripleCount/395))

E1 Accuracy: 		0.291139
E2 Accuracy: 		0.281013
En Accuracy: 		0.139241
Triple Accuracy: 	0.113924


In [84]:
for i in range(395):
    print('Predict: \t%s' % ' '.join(preResult[i]))
    print('Ground-Truth: \t%s' % ' '.join(actResult[i]))
    print('---')

Predict: 	mexico spain /location/country/administrative_divisions
Ground-Truth: 	spain italy /location/administrative_division/country
---
Predict: 	westchester long /location/location/contains
Ground-Truth: 	westchester connecticut /location/administrative_division/country
---
Predict: 	israel saudi /location/location/contains
Ground-Truth: 	dalia israel /people/person/nationality
---
Predict: 	india university /location/location/contains
Ground-Truth: 	hyderabad potti /location/location/contains
---
Predict: 	north bush /location/location/contains
Ground-Truth: 	kent north /people/person/place_lived
---
Predict: 	ontario waterloo /location/location/contains
Ground-Truth: 	ontario waterloo /location/location/contains
---
Predict: 	stephen a. /business/person/company
Ground-Truth: 	blackstone stephen /business/company/founders
---
Predict: 	mexico spain /location/country/administrative_divisions
Ground-Truth: 	france italy /location/administrative_division/country
---
Predict: 	iran ru

## Beam Search Evaluate

In [88]:
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm

In [89]:
filename = 'cp_logs/weights.336-2.599974.hdf5'
model.load_weights(filename)

In [126]:
def beamsearch(NUM_BEAMS):
    preResult = []
    for i in tqdm(range(395)):
        fb_words = np.array([58840])
        x = np.array([x_test[i]])
    
        st_words = np.array([58840])
        st_input = pad_sequences([st_words], maxlen=MAX_ADJL_LEN, padding='post', truncating='post')
        prob = model.predict([x, st_input], batch_size=BATCH_SIZE)[0][0]
        prob[0] = -1
        for item in [i for i in x_test[i] if i != 0]:
            prob[item] = float(prob[item]+1)
        beam = {}
        for _ in range(NUM_BEAMS):
            b = prob.argmax()
            beam[b] = float(prob[b])
            prob[b] = -1
        
        allResult = []
        for b, s in beam.items():
            pR = [b]
            fb_words = np.append(fb_words, b)
            tag = b
            for j in range(1, MAX_ADJL_LEN):
                fb_input = pad_sequences([fb_words], maxlen=MAX_ADJL_LEN, padding='post', truncating='post')
                prob = model.predict([x, fb_input], batch_size=BATCH_SIZE)[0][j]
                if j == 1:
                    for item in [i for i in x_test[i] if i != 0]:
                        prob[item] = float(prob[item]+1)
                if j == 2:
                    prob[:58828] = -1
                prob[0] = -1
                prob[tag] = -1
                result = prob.argmax()
                beam[b] += float(prob[result])
                tag = result
                pR.append(result)
                fb_words = np.append(fb_words, result)            
            allResult.append(pR)
        pR = sum([i for i in allResult if i[0] == max(beam, key=lambda x: beam[x])], [])
        preResult.append(pR)

    preResult = [[index2word[i] for i in sent if i != 0] for sent in preResult]
    actResult = [[index2word[i] for i in sent if i != 0] for sent in y_test]
    e1Count = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0]])
    e2Count = sum([1 for i in range(395) if actResult[i][1] == preResult[i][1]])
    enCount = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0] and actResult[i][1] == preResult[i][1]])
    reCount = sum([1 for i in range(395) if actResult[i][2] == preResult[i][2]])
    tripleCount = sum([1 for i in range(395) if actResult[i][0] == preResult[i][0] and actResult[i][1] == preResult[i][1] and actResult[i][2] == preResult[i][2]])
    print('Beam: \t\t\t%d' % NUM_BEAMS)
    print('E1 Accuracy: \t\t%.6f' % (e1Count/395))
    print('E2 Accuracy: \t\t%.6f' % (e2Count/395))
    print('En Accuracy: \t\t%.6f' % (enCount/395))
    print('Re Accuracy: \t\t%.6f' % (reCount/395))
    print('Triple Accuracy: \t%.6f' % (tripleCount/395))
    print('--------------------------------')

In [128]:
for NUM_BEAMS in range(1, 3):
    beamsearch(NUM_BEAMS)

100%|██████████| 395/395 [02:13<00:00,  2.97it/s]
  0%|          | 0/395 [00:00<?, ?it/s]

Beam: 			1
E1 Accuracy: 		0.291139
E2 Accuracy: 		0.281013
En Accuracy: 		0.139241
Re Accuracy: 		0.506329
Triple Accuracy: 	0.113924
--------------------------------


100%|██████████| 395/395 [03:46<00:00,  1.36it/s]

Beam: 			2
E1 Accuracy: 		0.278481
E2 Accuracy: 		0.298734
En Accuracy: 		0.113924
Re Accuracy: 		0.506329
Triple Accuracy: 	0.075949
--------------------------------





In [None]:
for i in range(395):
    print('Predict: \t%s' % ' '.join(preResult[i]))
    print('Ground-Truth: \t%s' % ' '.join(actResult[i]))
    print('---')