In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/text_matching/ant/main')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
%tensorflow_version 2.x
!pip install tensorflow-addons



In [3]:
from sklearn.metrics import classification_report

import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
import pprint
import logging
import time
import math
import json

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 2.3.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU Enabled: True


In [4]:
def get_vocab(f_path):
  k2v = {}
  with open(f_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip()
      k2v[line] = i
  return k2v

In [5]:
# stream data from text files
def data_generator(f_path, char2idx):
  with open(f_path) as f:
    print('Reading', f_path)
    for line in f:
      line = json.loads(line.rstrip())
      text1, text2, label = line['sentence1'], line['sentence2'], line['label']
      text1 = [char2idx.get(c, len(char2idx)) for c in list(text1)]
      text2 = [char2idx.get(c, len(char2idx)) for c in list(text2)]
      yield ((text1, text2), int(label))


def dataset(is_training, params):
  _shapes = (([None], [None]), ())
  _types = ((tf.int32, tf.int32), tf.int32)
  _pads = ((0, 0), -1)
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['train_path'], params['char2idx']),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['buffer_size'])
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['test_path'], params['char2idx']),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds

In [6]:
class FFNBlock(tf.keras.Model):
  def __init__(self, params, name):
    super().__init__(name = name)
    self.dropout1 = tf.keras.layers.Dropout(params['dropout_rate'])
    self.fc1 = tf.keras.layers.Dense(params['hidden_units'], params['activation'])
    self.dropout2 = tf.keras.layers.Dropout(params['dropout_rate'])
    self.fc2 = tf.keras.layers.Dense(params['hidden_units'], params['activation'])
  
  def call(self, inputs, training=False):
    x = inputs
    x = self.dropout1(x, training=training)
    x = self.fc1(x)
    x = self.dropout2(x, training=training)
    x = self.fc2(x)
    return x

In [7]:
class Pyramid(tf.keras.Model):
  def __init__(self, params: dict):
    super().__init__()
    self.embedding = tf.Variable(np.load(params['embedding_path']), name='pretrained_embedding', dtype=tf.float32)
    
    self.inp_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    
    self.encoder = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
      params['hidden_units'], return_sequences=True, zero_output_for_mask=True), name='encoder')
    
    self.conv_1 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation=params['activation'], padding='same')
    
    self.conv_2 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation=params['activation'], padding='same')
    
    self.conv_3 = tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation=params['activation'], padding='same')
    
    self.W_0 = tf.keras.layers.Dense(2*params['hidden_units'], use_bias=False)
    
    self.W_1_1 = tf.keras.layers.Dense(params['hidden_units'], use_bias=False)
    
    self.W_1_2 = tf.keras.layers.Dense(params['hidden_units'], use_bias=False)
    
    self.v_1 = tf.keras.layers.Dense(1, use_bias=False)
    
    self.W_2 = tf.keras.layers.Dense(params['hidden_units'], use_bias=False)
    
    self.v_2 = tf.keras.layers.Dense(1, use_bias=False)
    
    self.W_3 = tf.keras.layers.Dense(params['hidden_units'], use_bias=False)
    
    self.v_3 = tf.keras.layers.Dense(1, use_bias=False)
    
    self.flatten = tf.keras.layers.Flatten()
    
    self.out_hidden = FFNBlock(params, name='out_hidden')
    
    self.out_linear = tf.keras.layers.Dense(1, name='out_linear')

  
  def get_stride(self, x, fixed_len):
    batch_sz = tf.shape(x)[0]
    len = x.shape[1]
    stride = len // fixed_len
    if len // stride != fixed_len:
      remin = (stride + 1) * fixed_len - len
      zeros = tf.zeros([batch_sz, remin], tf.int32)
      x = tf.concat([x, zeros], 1)
      len = x.shape[1]
      stride = len // fixed_len
    return x, stride
  
  
  def call(self, inputs, training=False):
    x1, x2 = inputs
    
    if x1.dtype != tf.int32:
      x1 = tf.cast(x1, tf.int32)
    if x2.dtype != tf.int32:
      x2 = tf.cast(x2, tf.int32)
    
    x1, stride1 = self.get_stride(x1, params['fixed_len1'])
    x2, stride2 = self.get_stride(x2, params['fixed_len2'])
    
    mask1 = tf.sign(x1)
    mask2 = tf.sign(x2)
    
    x1 = tf.nn.embedding_lookup(self.embedding, x1)
    x2 = tf.nn.embedding_lookup(self.embedding, x2)
    
    x1 = self.inp_dropout(x1, training=training)
    x2 = self.inp_dropout(x2, training=training)
    
    x1 = self.encoder(x1, mask=tf.cast(mask1, tf.bool))
    x2 = self.encoder(x2, mask=tf.cast(mask2, tf.bool))
    x = []
    
    # attention 1 (bilinear)
    a = tf.matmul(x1, self.W_0(x2), transpose_b=True)
    x.append(tf.expand_dims(a, -1))
    
    # attention 2 (add)
    a1 = tf.expand_dims(self.W_1_1(x1), 2)
    a2 = tf.expand_dims(self.W_1_2(x2), 1)
    x.append(self.v_1(tf.tanh(a1 + a2)))
    
    # attention 3 (minus)
    a1 = tf.expand_dims(x1, 2)
    a2 = tf.expand_dims(x2, 1)
    x.append(self.v_2(tf.tanh(self.W_2(tf.abs(a1 - a2)))))
    
    # attention 4 (dot)
    a1 = tf.expand_dims(x1, 2)
    a2 = tf.expand_dims(x2, 1)
    x.append(self.v_3(tf.tanh(self.W_3(a1 * a2))))
    
    x = tf.concat(x, -1)
    x = self.conv_1(x)
    x = tf.nn.max_pool(x, [1, stride1, stride2, 1], [1, stride1, stride2, 1], 'VALID')
    x = self.conv_2(x)
    x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')
    x = self.conv_3(x)
    x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')
    
    x = self.flatten(x)
    x = self.out_hidden(x, training=training)
    x = self.out_linear(x)
    x = tf.squeeze(x, 1)
    
    return x

In [8]:
params = {
  'train_path': '../data/train.json',
  'test_path': '../data/dev.json',
  'vocab_path': '../vocab/char.txt',
  'embedding_path': '../vocab/char.npy',
  'batch_size': 32,
  'buffer_size': 34334,
  'dropout_rate': .2,
  'hidden_units': 300,
  'fixed_len1': 12,
  'fixed_len2': 12,
  'activation': tf.nn.swish,
  'init_lr': 1e-4,
  'max_lr': 8e-4,
  'label_smooth': .0,
  'clip_norm': .1,
  'num_patience': 10,
}

In [9]:
def label_smoothing(label, smooth):
  if smooth > 0.:
    return label * (1 - smooth) + 0.5 * smooth
  else:
    return label

params['char2idx'] = get_vocab(params['vocab_path'])
params['vocab_size'] = len(params['char2idx']) + 1

# input stream ids check
(text1, text2), _ = next(data_generator(params['train_path'], params['char2idx']))
print(text1)
print(text2)

Reading ../data/train.json
[26, 25, 5, 1, 416, 20, 4, 7, 21, 22, 99, 128, 301, 87, 51, 246, 15]
[5, 1, 23, 301, 87, 38, 19, 4, 246, 15]


In [10]:
model = Pyramid(params)
model.build([[None, 13], [None, 13]])
pprint.pprint([(v.name, v.shape) for v in model.trainable_variables])

decay_lr = tfa.optimizers.Triangular2CyclicalLearningRate(
  initial_learning_rate = params['init_lr'],
  maximal_learning_rate = params['max_lr'],
  step_size = 8 * params['buffer_size'] // params['batch_size'],)
optim = tf.optimizers.Adam(params['init_lr'])
global_step = 0

best_acc = .0
count = 0

t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)

while True:
  # TRAINING
  for ((text1, text2), labels) in dataset(is_training=True, params=params):
    with tf.GradientTape() as tape:
      logits = model((text1, text2), training=True)
      labels = tf.cast(labels, tf.float32)
      num_neg = tf.reduce_sum(tf.cast(tf.equal(labels, 0.), tf.float32)).numpy()
      num_pos = tf.reduce_sum(labels).numpy()
      if num_pos == 0.:
        pos_weight = 1.
      else:
        pos_weight = num_neg / num_pos
      loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(
        labels = label_smoothing(labels, params['label_smooth']),
        logits = logits,
        pos_weight = pos_weight))
    
    optim.lr.assign(decay_lr(global_step))
    grads = tape.gradient(loss, model.trainable_variables)
    grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
    optim.apply_gradients(zip(grads, model.trainable_variables))
    
    if global_step % 100 == 0:
      logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
        global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
      t0 = time.time()
    global_step += 1
  
  # EVALUATION
  m = tf.keras.metrics.Accuracy()
  intent_true = []
  intent_pred = []

  for ((text1, text2), labels) in dataset(is_training=False, params=params):
    logits = tf.sigmoid(model((text1, text2), training=False))
    y_pred = tf.cast(tf.math.greater_equal(logits, .5), tf.int32)
    m.update_state(y_true=labels, y_pred=y_pred)
    intent_true += labels.numpy().flatten().tolist()
    intent_pred += y_pred.numpy().flatten().tolist()

  acc = m.result().numpy()
  logger.info("Evaluation: Testing Accuracy: {:.3f}".format(acc))

  logger.info('\n'+classification_report(y_true = intent_true,
                                         y_pred = intent_pred,
                                         labels = [0, 1],
                                         target_names = ['Not Matched', 'Matched'],
                                         digits = 3))

  if acc > best_acc:
    best_acc = acc
    # you can save model here
    count = 0
  else:
    count += 1
  logger.info("Best Accuracy: {:.3f}".format(best_acc))

  if count == params['num_patience']:
    print(params['num_patience'], "times not improve the best result, therefore stop training")
    break

[('encoder/forward_lstm/lstm_cell_1/kernel:0', TensorShape([300, 1200])),
 ('encoder/forward_lstm/lstm_cell_1/recurrent_kernel:0',
  TensorShape([300, 1200])),
 ('encoder/forward_lstm/lstm_cell_1/bias:0', TensorShape([1200])),
 ('encoder/backward_lstm/lstm_cell_2/kernel:0', TensorShape([300, 1200])),
 ('encoder/backward_lstm/lstm_cell_2/recurrent_kernel:0',
  TensorShape([300, 1200])),
 ('encoder/backward_lstm/lstm_cell_2/bias:0', TensorShape([1200])),
 ('conv2d/kernel:0', TensorShape([3, 3, 4, 32])),
 ('conv2d/bias:0', TensorShape([32])),
 ('conv2d_1/kernel:0', TensorShape([3, 3, 32, 64])),
 ('conv2d_1/bias:0', TensorShape([64])),
 ('conv2d_2/kernel:0', TensorShape([3, 3, 64, 128])),
 ('conv2d_2/bias:0', TensorShape([128])),
 ('dense/kernel:0', TensorShape([600, 600])),
 ('dense_1/kernel:0', TensorShape([600, 300])),
 ('dense_2/kernel:0', TensorShape([600, 300])),
 ('dense_3/kernel:0', TensorShape([300, 1])),
 ('dense_4/kernel:0', TensorShape([600, 300])),
 ('dense_5/kernel:0', Tensor