# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Checkpoint" data-toc-modified-id="Checkpoint-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Checkpoint</a></div><div class="lev1 toc-item"><a href="#Import-Libraries" data-toc-modified-id="Import-Libraries-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Import Libraries</a></div><div class="lev1 toc-item"><a href="#Set-Hyperparameters" data-toc-modified-id="Set-Hyperparameters-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Set Hyperparameters</a></div><div class="lev1 toc-item"><a href="#Build-Graph" data-toc-modified-id="Build-Graph-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Build Graph</a></div><div class="lev1 toc-item"><a href="#Train" data-toc-modified-id="Train-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Train</a></div>

# Checkpoint

In [1]:
import h5py
import pickle

In [2]:
with open('index.pkl', 'rb') as fp:
    word2index, index2word = pickle.load(fp)

In [3]:
with h5py.File('train.h5', 'r') as fh:
    xTrain = fh['xTrain'][:]
    yTrain = fh['yTrain'][:]

# Import Libraries

In [4]:
from keras.layers import Input, Embedding, Dense, Flatten, BatchNormalization, Activation
from keras.models import Model
import keras.backend as K
from keras.callbacks import*
from tqdm import tqdm
import numpy as np

Using TensorFlow backend.


# Set Hyperparameters

In [8]:
VOCAB_SIZE = len(word2index)+1
EMBEDDING_DIM = 256
HIDDEN_SIZE = 512
BATCH_SIZE = 1024
NUM_EPOCHS = 256
STEPS_PER_EPOCH = 2048

# Build Graph

In [9]:
def build():
    """
    Build embedding matrix
    """
    K.clear_session()
    seq = Input(shape=(None,), dtype='int64')
    emb = Embedding(VOCAB_SIZE,
                    EMBEDDING_DIM,
                    mask_zero=False,
                    trainable=True)(seq)
    model = Model(inputs=seq, outputs=emb)
    return model

In [10]:
embedding = build()

In [14]:
inputs = Input(shape=(1,), name='INPUT', dtype='int64')
embSeq = embedding(inputs)
embSeq = Flatten(name='FLATTEN')(embSeq)
dense1 = Dense(HIDDEN_SIZE, activation=None, name='DENSE_1')(embSeq)
bn1 = BatchNormalization(name='BN_1')(dense1)
act1 = Activation('relu', name='RELU_1')(bn1)
dense2 = Dense(HIDDEN_SIZE, activation=None, name='DENSE_2')(act1)
bn2 = BatchNormalization(name='BN_2')(dense2)
act2 = Activation('relu', name='RELU_2')(bn2)
dense3 = Dense(VOCAB_SIZE, activation=None, name='DENSE_3')(act2)
bn3 = BatchNormalization(name='BN_3')(dense3)
out = Activation('softmax', name='OUTPUT')(bn3)
model = Model(inputs=inputs, outputs=out)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
INPUT (InputLayer)           (None, 1)                 0         
_________________________________________________________________
model_1 (Model)              multiple                  715520    
_________________________________________________________________
FLATTEN (Flatten)            (None, 256)               0         
_________________________________________________________________
DENSE_1 (Dense)              (None, 512)               131584    
_________________________________________________________________
BN_1 (BatchNormalization)    (None, 512)               2048      
_________________________________________________________________
RELU_1 (Activation)          (None, 512)               0         
_________________________________________________________________
DENSE_2 (Dense)              (None, 512)               262656    
__________

# Train

In [16]:
filepath = 'cp_logs/weights.{epoch:03d}-{loss:.6f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
log_string = 'tb_logs/2'
tensorboard = TensorBoard(log_dir=log_string)
callbacks_list = [checkpoint, tensorboard]

In [17]:
def data_generator(data, label, batch_size):
    """
    Yield batches of all data
    """
    count = 0
    while True:
        if count >= len(data): 
            count = 0
        x = np.zeros((batch_size, 1))
        y = np.zeros((batch_size, 1))
        for i in range(batch_size):
            n = i + count
            if n > len(data)-1:
                break
            x[i, :] = data[n]
            y[i, :] = label[n]
        count += batch_size
        yield (x, y)

In [18]:
gen_train = data_generator(xTrain, yTrain, BATCH_SIZE)

In [19]:
history = model.fit_generator(gen_train,
                              steps_per_epoch=STEPS_PER_EPOCH,
                              epochs=NUM_EPOCHS,
                              callbacks=callbacks_list)

Epoch 1/256
Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256
Epoch 10/256
Epoch 11/256
Epoch 12/256
Epoch 13/256
Epoch 14/256
Epoch 15/256
Epoch 16/256
Epoch 17/256
Epoch 18/256
Epoch 19/256
Epoch 20/256
Epoch 21/256
Epoch 22/256
Epoch 23/256
Epoch 24/256
Epoch 25/256
Epoch 26/256
Epoch 27/256
Epoch 28/256
Epoch 29/256
Epoch 30/256
Epoch 31/256
Epoch 32/256
Epoch 33/256
Epoch 34/256
Epoch 35/256
Epoch 36/256
Epoch 37/256
Epoch 38/256
Epoch 39/256
Epoch 40/256
Epoch 41/256
Epoch 42/256
Epoch 43/256
Epoch 44/256
Epoch 45/256
Epoch 46/256
Epoch 47/256
Epoch 48/256
Epoch 49/256
Epoch 50/256
Epoch 51/256
Epoch 52/256
Epoch 53/256
Epoch 54/256
Epoch 55/256
Epoch 56/256
Epoch 57/256
Epoch 58/256
Epoch 59/256
Epoch 60/256
Epoch 61/256
Epoch 62/256
Epoch 63/256
Epoch 64/256
Epoch 65/256
Epoch 66/256
Epoch 67/256
Epoch 68/256
Epoch 69/256
Epoch 70/256
Epoch 71/256
Epoch 72/256
Epoch 73/256
Epoch 74/256
Epoch 75/256
Epoch 76/256
Epoch 77/256


Epoch 78/256
Epoch 79/256
Epoch 80/256
Epoch 81/256
Epoch 82/256
Epoch 83/256
Epoch 84/256
Epoch 85/256
Epoch 86/256
Epoch 87/256
Epoch 88/256
Epoch 89/256
Epoch 90/256
Epoch 91/256
Epoch 92/256
Epoch 93/256
Epoch 94/256
Epoch 95/256
Epoch 96/256
Epoch 97/256
Epoch 98/256
Epoch 99/256
Epoch 100/256
Epoch 101/256
Epoch 102/256
Epoch 103/256
Epoch 104/256
Epoch 105/256
Epoch 106/256
Epoch 107/256
Epoch 108/256
Epoch 109/256
Epoch 110/256
Epoch 111/256
Epoch 112/256
Epoch 113/256
Epoch 114/256
Epoch 115/256
Epoch 116/256
Epoch 117/256
Epoch 118/256
Epoch 119/256
Epoch 120/256
Epoch 121/256
Epoch 122/256
Epoch 123/256
Epoch 124/256
Epoch 125/256
Epoch 126/256
Epoch 127/256
Epoch 128/256
Epoch 129/256
Epoch 130/256
Epoch 131/256
Epoch 132/256
Epoch 133/256
Epoch 134/256
Epoch 135/256
Epoch 136/256
Epoch 137/256
Epoch 138/256
Epoch 139/256
Epoch 140/256
Epoch 141/256
Epoch 142/256
Epoch 143/256
Epoch 144/256
Epoch 145/256
Epoch 146/256
Epoch 147/256
Epoch 148/256
Epoch 149/256
Epoch 150/256


Epoch 160/256
Epoch 161/256
Epoch 162/256
Epoch 163/256
Epoch 164/256
Epoch 165/256
Epoch 166/256
Epoch 167/256
Epoch 168/256
Epoch 169/256
Epoch 170/256
Epoch 171/256
Epoch 172/256
Epoch 173/256
Epoch 174/256
Epoch 175/256
Epoch 176/256
Epoch 177/256
Epoch 178/256
Epoch 179/256
Epoch 180/256
Epoch 181/256
Epoch 182/256
Epoch 183/256
Epoch 184/256
Epoch 185/256
Epoch 186/256
Epoch 187/256
Epoch 188/256
Epoch 189/256
Epoch 190/256
Epoch 191/256
Epoch 192/256
Epoch 193/256
Epoch 194/256
Epoch 195/256
Epoch 196/256
Epoch 197/256
Epoch 198/256
Epoch 199/256
Epoch 200/256
Epoch 201/256
Epoch 202/256
Epoch 203/256
Epoch 204/256
Epoch 205/256
Epoch 206/256
Epoch 207/256
Epoch 208/256
Epoch 209/256
Epoch 210/256
Epoch 211/256
Epoch 212/256
Epoch 213/256
Epoch 214/256
Epoch 215/256
Epoch 216/256
Epoch 217/256
Epoch 218/256
Epoch 219/256
Epoch 220/256
Epoch 221/256
Epoch 222/256
Epoch 223/256
Epoch 224/256
Epoch 225/256
Epoch 226/256
Epoch 227/256
Epoch 228/256
Epoch 229/256
Epoch 230/256
Epoch 

Epoch 246/256
Epoch 247/256
Epoch 248/256
Epoch 249/256
Epoch 250/256
Epoch 251/256
Epoch 252/256
Epoch 253/256
Epoch 254/256
Epoch 255/256
Epoch 256/256
