In [0]:
"""
We use following lines because we are running on Google Colab
If you are running notebook on a local computer, you don't need this cell
"""
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/text_matching/snli/main')

In [0]:
import tensorflow as tf
print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())
import numpy as np
import os


from pathlib import Path

TensorFlow Version 1.12.0
GPU Enabled: True


In [0]:
def bi_lstm(x1, x2, seq_len_1, seq_len_2):
  lstm_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['hidden_dim'], name='lstm_fused_fw')
  lstm_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['hidden_dim'], name='lstm_fused_bw')
  lstm_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_bw)
  
  t1 = tf.transpose(x1, [1,0,2])
  t2 = tf.transpose(x2, [1,0,2])
  
  o1_fw, _ = lstm_fw(t1, dtype=tf.float32, sequence_length=seq_len_1)
  o1_bw, _ = lstm_bw(t1, dtype=tf.float32, sequence_length=seq_len_1)
  
  o2_fw, _ = lstm_fw(t2, dtype=tf.float32, sequence_length=seq_len_2)
  o2_bw, _ = lstm_bw(t2, dtype=tf.float32, sequence_length=seq_len_2)
  
  t1 = tf.concat([o1_fw, o1_bw], -1)
  t2 = tf.concat([o2_fw, o2_bw], -1)
  
  x1_ = tf.transpose(t1, [1,0,2])
  x2_ = tf.transpose(t2, [1,0,2])
  return x1_, x2_


def masked_attention(x, align, mask, tile_len):
  pad = tf.fill(tf.shape(align), float('-inf'))
  mask = tf.tile(tf.expand_dims(mask, 1), [1, tile_len, 1])
  align = tf.where(tf.equal(mask, 0), pad, align)
  align = tf.nn.softmax(align)
  return tf.matmul(align, x)

  
def soft_align_attention(x1, x2, mask1, mask2):
  align12 = tf.matmul(x1, x2, transpose_b=True)
  align21 = tf.transpose(align12, [0,2,1])
  
  x1_ = masked_attention(x2, align12, mask2, tf.shape(x1)[1])
  x2_ = masked_attention(x1, align21, mask1, tf.shape(x2)[1])
  
  return x1_, x2_


def attention_pooling(x, masks, dense1, dense2, dropout, is_training):  
  # alignment
  align = dense2(dense1(x))
  align = tf.squeeze(align, -1)
  
  # masking
  paddings = tf.fill(tf.shape(align), float('-inf'))
  align = tf.where(tf.equal(masks, 0), paddings, align)

  # probability
  align = tf.nn.softmax(align)
  align = dropout(align, training=is_training)
  align = tf.expand_dims(align, -1)
 
  # weighted sum
  x = tf.squeeze(tf.matmul(x, align, transpose_a=True), -1)
  
  return x

In [0]:
def model_fn(features, labels, mode, params):
  # Flag for Dropout / Batch Norm
  is_training = (mode == tf.estimator.ModeKeys.TRAIN)
  
  
  # Receive inputs
  if isinstance(features, dict):
    text1 = features['text1']
    text2 = features['text2']
  else:
    text1, text2 = features
  batch_sz = tf.shape(text1)[0]
  
  
  # Word Indexing
  vocab = tf.contrib.lookup.index_table_from_file(
        params['vocab_path'], num_oov_buckets=1)
  
  text1 = vocab.lookup(text1)
  text2 = vocab.lookup(text2)
  
  
  # For Masking
  seq_len1 = tf.count_nonzero(text1, 1, dtype=tf.int32)
  seq_len2 = tf.count_nonzero(text2, 1, dtype=tf.int32)
  
  mask1 = tf.sign(text1)
  mask2 = tf.sign(text2)
  
  
  # Embedding
  embedding = np.load(params['embedding_path'])
  embedding = tf.Variable(embedding, name='embedding', dtype=tf.float32)
  x1 = tf.nn.embedding_lookup(embedding, text1)
  x2 = tf.nn.embedding_lookup(embedding, text2)
  
  
  # Encoding
  dropout = tf.layers.Dropout(0.2,
                              noise_shape=(batch_sz, 1, params['hidden_dim']))
  x1 = dropout(x1, training=is_training)
  x2 = dropout(x2, training=is_training)
  
  x1, x2 = bi_lstm(x1, x2, seq_len1, seq_len2)
  
  
  # Interaction / Comparison
  x1_, x2_ = soft_align_attention(x1, x2, mask1, mask2)
  fn = lambda x, x_: tf.concat((x,
                                x_,
                                (x - x_),
                                (x * x_),), -1)
  x1 = fn(x1, x1_)
  x2 = fn(x2, x2_)
  
  dropout = tf.layers.Dropout(0.5,
                              noise_shape=(batch_sz, 1, 8*params['hidden_dim']))
  x1 = dropout(x1, training=is_training)
  x2 = dropout(x2, training=is_training)
  
  fully_connected = tf.layers.Dense(params['hidden_dim'], tf.nn.elu)
  x1 = fully_connected(x1)
  x2 = fully_connected(x2)
  
  
  # Encoding on top of Interaction
  dropout = tf.layers.Dropout(0.2,
                              noise_shape=(batch_sz, 1, params['hidden_dim']))
  x1 = dropout(x1, training=is_training)
  x2 = dropout(x2, training=is_training)
  
  x1, x2 = bi_lstm(x1, x2, seq_len1, seq_len2)
  
  
  # Extraction
  attn1 = tf.layers.Dense(params['hidden_dim'], tf.tanh, use_bias=False, name='attn_pool_1')
  attn2 = tf.layers.Dense(1, use_bias=False, name='attn_pool_2')
  dropout = tf.layers.Dropout(0.15)
  
  x = tf.concat((
      attention_pooling(x1, mask1, attn1, attn2, dropout, is_training),
      attention_pooling(x2, mask2, attn1, attn2, dropout, is_training),
      tf.reduce_max(x1, 1),
      tf.reduce_max(x2, 1),
  ), -1)
  
  
  # Fully Connected
  x = tf.layers.dropout(x, 0.5, training=is_training)
  
  x = tf.layers.dense(x, params['hidden_dim'], tf.nn.elu, name='fully_connected_1')
  
  x = tf.layers.dropout(x, 0.2, training=is_training)
  
  x = tf.layers.dense(x, params['hidden_dim'], tf.nn.elu, name='fully_connected_2')
  
  x = tf.layers.dropout(x, 0.2, training=is_training)
  
  logits = tf.layers.dense(x, params['num_labels'], name='output')
  
  
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=tf.argmax(logits, -1))
  else:
    loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(
      labels=labels, logits=logits)
    loss_op = tf.reduce_mean(loss_op)
  
  
  if mode == tf.estimator.ModeKeys.TRAIN:
    variables = tf.trainable_variables()
    tf.logging.info('\n'+pprint.pformat(variables))
    
    grads = tf.gradients(loss_op, variables)
    grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
    
    global_step=tf.train.get_or_create_global_step()
    decay_lr = tf.train.exponential_decay(
        params['lr'], global_step, 100000, .25)
    
    optim = tf.train.AdamOptimizer(decay_lr)
    
    train_op = optim.apply_gradients(
      zip(grads, variables), global_step=global_step)
    
    hook = tf.train.LoggingTensorHook({'lr': decay_lr}, every_n_iter=100)
    
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss_op,
                                      train_op=train_op,
                                      training_hooks=[hook],)
  
  
  if mode == tf.estimator.ModeKeys.EVAL:
    acc_op = tf.metrics.accuracy(labels=labels,
                                 predictions=tf.argmax(logits, -1))
    
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss_op,
                                      eval_metric_ops={'acc': acc_op})

In [0]:
params = {
    'model_dir': '../model/esim',
    'export_dir': '../model/esim_export',
    'log_path': '../log/esim.txt',
    'train_path': '../data/train.txt',
    'test_path': '../data/test.txt',
    'embedding_path': '../vocab/word.npy',
    'vocab_path': '../vocab/word.txt',
    'batch_size': 32,
    'num_samples': 550152,
    'hidden_dim': 300,
    'lr': 4e-4,
    'clip_norm': 10.,
    'num_labels': 3,
    'num_patience': 7,
}

In [0]:
def serving_input_receiver_fn():
    text1 = tf.placeholder(tf.string, [None, None], 'text1')
    text2 = tf.placeholder(tf.string, [None, None], 'text2')
    
    features = {'text1': text1, 'text2': text2,}
    receiver_tensors = features
    
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

In [0]:
estimator = tf.estimator.Estimator(model_fn, params['model_dir'], params=params)
estimator.export_saved_model(params['export_dir'], serving_input_receiver_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '../model/esim', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3f6c500fd0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:t

b'../model/esim_export/1548917780'

In [0]:
def parse_fn(text1, text2):
  parse = lambda text: [[w for w in text.split()]]
  return {'text1': parse(text1), 'text2': parse(text2),}


TEXT1 = 'a man inspects the uniform of a figure in some east asian country'
TEXT2 = 'the man is sleeping'
idx2label = {0:'neutral', 1:'entailment', 2:'contradiction'}
  

subdirs = [x for x in Path(params['export_dir']).iterdir()
           if x.is_dir() and 'temp' not in str(x)]
latest = str(sorted(subdirs)[-1])
  
  
predict_fn = tf.contrib.predictor.from_saved_model(latest)
predictions = predict_fn(parse_fn(TEXT1, TEXT2))


print('Input 1:', TEXT1)
print('Input 2:', TEXT2)
print('Output:', idx2label[predictions['output'][0]])

INFO:tensorflow:Restoring parameters from ../model/esim_export/1548917780/variables/variables
Input 1: a man inspects the uniform of a figure in some east asian country
Input 2: the man is sleeping
Output: contradiction
