# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Checkpoint" data-toc-modified-id="Checkpoint-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Checkpoint</a></div><div class="lev1 toc-item"><a href="#Build-Model" data-toc-modified-id="Build-Model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Build Model</a></div><div class="lev2 toc-item"><a href="#Import-Libraries" data-toc-modified-id="Import-Libraries-21"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Import Libraries</a></div><div class="lev2 toc-item"><a href="#Set-Hyperparameters" data-toc-modified-id="Set-Hyperparameters-22"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Set Hyperparameters</a></div><div class="lev2 toc-item"><a href="#Build-Graph" data-toc-modified-id="Build-Graph-23"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Build Graph</a></div>

# Checkpoint

In [1]:
import h5py

In [2]:
with h5py.File('/Users/lizhn7/Downloads/EXPERIMENT/COPA/RN/data/embedding.h5', 'r') as fh:
    embedding = fh['embedding'][:]
    
with h5py.File('/Users/lizhn7/Downloads/EXPERIMENT/COPA/RN/data/train.h5', 'r') as fh:
    xpTrain = fh['xpTrain'][:]
    xaTrain = fh['xaTrain'][:]
    xqTrain = fh['xaTrain'][:]
    yceTrain = fh['yceTrain'][:]
    yhiTrain = fh['yhiTrain'][:]
    
with h5py.File('/Users/lizhn7/Downloads/EXPERIMENT/COPA/RN/data/test.h5', 'r') as fh:
    xpTest = fh['xpTest'][:]
    xa1Test = fh['xa1Test'][:]
    xa2Test = fh['xa2Test'][:]
    xqTest = fh['xqTest'][:]
    yTest = fh['yTest'][:]

In [23]:
xpTrain[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0, 2984, 2985,
          1,  749], dtype=int32)

In [24]:
xaTrain[0]

array([   1,   69,  748,    1, 1519,  999,  749,    0,    0,    0,    0,
          0,    0], dtype=int32)

# Build Model

## Import Libraries

In [141]:
from keras.layers import Lambda, Input, Embedding, Dropout, GRU, RepeatVector, Concatenate, concatenate, TimeDistributed, Dense, BatchNormalization, Merge
from keras.models import Model
from keras.models import Sequential
import keras.backend as K
from keras.regularizers import l2
from keras.constraints import maxnorm
from keras.callbacks import*
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

## Set Hyperparameters

In [94]:
MAX_LEN = 13
MAX_Q_LEN = 6
VOCAB_SIZE = 3371
VOCAB_Q_SIZE = 11
EMBEDDING_SIZE = 300
EMBEDDING_Q_SIZE = 8
GRU_SIZE = 192
GRU_Q_SIZE = 16
DROPOUT_RATE = 0.2
L2_NORM = 1e-4
HIDDEN_SIZE = 200
WEIGHT_CONSTRAINT = 3.

In [None]:
MAX_LEN = 22
VOCAB_SIZE = 3371
SEED = 42
EMBEDDING_DIM = 300
TUNE = False
BATCH_SIZE = 50
NUM_EPOCHS = 10
NUM_FILTER = 100
FILTER_SIZES = (3, 4, 5)
DROPOUT_RATE = 0.5
DELTA = 0.009
LAMBDA = 0.0001
LEARNING_RATE = 0.01

## Build Graph

In [79]:
def attention_context(x):
    """
    Compute context with attention weight
    """
    pre, alt = x[:, :MAX_LEN, :], x[:, :MAX_LEN, :]
    attention_energies = K.batch_dot(pre, alt, axes=(2, 2))
    attention_energies = K.reshape(attention_energies, (-1, MAX_LEN))
    attention_weights = K.softmax(attention_energies)
    attention_weights = K.reshape(attention_weights, (-1, MAX_LEN, MAX_LEN))
    attention_context = K.batch_dot(attention_weights, pre, axes=(2, 1))
    return K.concatenate((attention_context, alt))

In [116]:
def feature_map():
    """
    Build Attention GRU feature maps
    """
    K.clear_session()
    inputs = Input(shape=(MAX_LEN*2,), name='INPUT')
    emb_seq = Embedding(VOCAB_SIZE, 
                        EMBEDDING_SIZE, 
                        weights=[embedding], 
                        mask_zero=True, 
                        trainable=False, 
                        name='EMBEDDING')(inputs)
    gru = GRU(GRU_SIZE, 
              return_sequences=True, 
              implementation=0, 
              dropout=DROPOUT_RATE, 
              recurrent_dropout=DROPOUT_RATE,
              kernel_regularizer=l2(L2_NORM),
              recurrent_regularizer=l2(L2_NORM),
              name='GRU')(emb_seq)
    gru = Dropout(DROPOUT_RATE, name='DROPOUT_FM')(gru)
    att_maps = Lambda(attention_context, name='ATTENTION_MAPS')(gru)
    model = Model(inputs=[inputs], outputs=[att_maps])
    return model

In [81]:
def question_encoder():
    """
    Build question encoder
    """
    #K.clear_session()
    model = Sequential()
    model.add(Embedding(VOCAB_Q_SIZE, 
                        EMBEDDING_Q_SIZE, 
                        mask_zero=True, 
                        input_length=MAX_Q_LEN, 
                        trainable=True, 
                        name='EMBEDDING_Q'))
    model.add(GRU(GRU_Q_SIZE, 
                  return_sequences=False, 
                  implementation=0, 
                  dropout=DROPOUT_RATE, 
                  recurrent_dropout=DROPOUT_RATE,
                  kernel_regularizer=l2(L2_NORM),
                  recurrent_regularizer=l2(L2_NORM),
                  name='GRU_Q'))    
    return model

In [82]:
model = feature_map()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
INPUT (InputLayer)           (None, 26)                0         
_________________________________________________________________
EMBEDDING (Embedding)        (None, 26, 300)           1011300   
_________________________________________________________________
GRU (GRU)                    (None, 26, 192)           283968    
_________________________________________________________________
DROPOUT (Dropout)            (None, 26, 192)           0         
_________________________________________________________________
ATTENTION_MAPS (Lambda)      (None, 13, 384)           0         
Total params: 1,295,268
Trainable params: 283,968
Non-trainable params: 1,011,300
_________________________________________________________________


In [148]:
def relation_networks():
    """
    Build relation networks
    """
    AttGRU = feature_map()
    QueENC = question_encoder()
    joint = []
    inputs_p = Input(shape=(MAX_LEN*2,), name='INPUT_P')
    joint.append(inputs_p)
    inputs_a = Input(shape=(MAX_LEN,), name='INPUT_A')
    joint.append(inputs_a)
    inputs = Concatenate(axis=1, name='INPUT')(joint)
    inputs_q = Input(shape=(MAX_Q_LEN,), name='INPUT_Q')
    feaMap = AttGRU(inputs)
    queSeq = QueENC(inputs_q)
    queSeq = RepeatVector(MAX_LEN, name='Q_ENC')(queSeq)
    feaComb = concatenate([feaMap, queSeq], axis=-1, name='FEA_COMB')
    mlp = TimeDistributed(Dense(HIDDEN_SIZE,
                                activation='elu',
                                kernel_regularizer=l2(L2_NORM),
                                kernel_constraint=maxnorm(WEIGHT_CONSTRAINT)),
                          name='MLP')(feaComb)
    mlp = BatchNormalization(name='BN')(mlp)
    mlp = Dropout(DROPOUT_RATE, name='DROPOUT_MLP')(mlp)
    ewSum = Lambda(lambda x: K.sum(x, axis=1), name='ELEMENT-WISE_SUM')(mlp)
    return ewSum

In [149]:
model = relation_networks()

In [150]:
model

<tf.Tensor 'ELEMENT-WISE_SUM/Sum:0' shape=(?, 200) dtype=float32>

In [121]:
a = np.random.random((100, 13, 200))

In [124]:
b = []

In [125]:
for i in a:
    b.append(i)

In [129]:
b[0].shape

(13, 200)