In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/spoken_language_understanding/atis/main')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
%tensorflow_version 2.x
!pip install tensorflow-addons



In [3]:
from sklearn.metrics import classification_report, f1_score, accuracy_score

import tensorflow as tf
import tensorflow_addons as tfa
import pprint
import logging
import time
import numpy as np

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 2.2.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU Enabled: True


In [0]:
def get_vocab(vocab_path):
  word2idx = {}
  with open(vocab_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip()
      word2idx[line] = i
  return word2idx

In [0]:
def data_generator(f_path, params):
  print('Reading', f_path)
  with open(f_path) as f:
    for line in f:
      line = line.rstrip()
      text, slot_intent = line.split('\t')
      words = text.split()[1:-1]
      slot_intent = slot_intent.split()
      slots, intent = slot_intent[1:-1], slot_intent[-1]
      assert len(words) == len(slots)
      
      words = [params['word2idx'].get(w, len(params['word2idx'])) for w in words]
      intent = params['intent2idx'].get(intent, len(params['intent2idx']))
      slots = [params['slot2idx'].get(s, len(params['slot2idx'])) for s in slots]
      
      yield (words, (intent, slots))

In [0]:
def dataset(is_training, params):
  _shapes = ([None], ((), [None]))
  _types = (tf.int32, (tf.int32, tf.int32))
  _pads = (0, (-1, 0))
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['train_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['num_samples'])
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['test_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds

In [0]:
class Model(tf.keras.Model):
  def __init__(self, params: dict):
    super().__init__()
    self.embedding = tf.Variable(np.load(params['vocab_path']),
                                 dtype=tf.float32,
                                 name='pretrained_embedding')
    self.input_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    
    self.bidir_gru = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(
        params['rnn_units'], return_state=True, return_sequences=True, zero_output_for_mask=True))
    
    self.intent_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    self.fc_intent = tf.keras.layers.Dense(params['rnn_units'], tf.nn.elu, name='fc_intent')
    self.out_linear_intent = tf.keras.layers.Dense(params['intent_size'], name='output_intent')
    self.out_linear_slot = tf.keras.layers.Dense(params['slot_size'], name='output_slot')

    self.trans_params = self.add_weight('transitions', (params['slot_size'], params['slot_size']))
  
  
  def call(self, inputs, training=False):
    if inputs.dtype != tf.int32:
      inputs = tf.cast(inputs, tf.int32)
    mask = tf.sign(inputs)
    rnn_mask = tf.cast(mask, tf.bool)
    
    x = tf.nn.embedding_lookup(self.embedding, inputs)
    x = self.input_dropout(x, training=training)
    x, s_fw, s_bw = self.bidir_gru(x, mask=rnn_mask)
    
    x_intent = tf.concat([tf.reduce_max(x, 1), s_fw, s_bw], -1)
    x_intent = self.intent_dropout(x_intent, training=training)
    x_intent = self.out_linear_intent(self.fc_intent(x_intent))
    x_slot = self.out_linear_slot(x)
    return (x_intent, x_slot)

In [0]:
params = {
  'train_path': '../data/atis.train.w-intent.iob',
  'test_path': '../data/atis.test.w-intent.iob',
  'word_path': '../vocab/word.txt',
  'vocab_path': '../vocab/word.npy',
  'intent_path': '../vocab/intent.txt',
  'slot_path': '../vocab/slot.txt',
  'batch_size': 16,
  'num_samples': 4978,
  'rnn_units': 300,
  'num_heads': 6,
  'multiplier': 2,
  'dropout_rate': .2,
  'clip_norm': .1,
}

In [0]:
params['word2idx'] = get_vocab(params['word_path'])
params['intent2idx'] = get_vocab(params['intent_path'])
params['slot2idx'] = get_vocab(params['slot_path'])

params['word_size'] = len(params['word2idx']) + 1
params['intent_size'] = len(params['intent2idx']) + 1
params['slot_size'] = len(params['slot2idx']) + 1

In [10]:
model = Model(params)
model.build(input_shape=(None, None))
pprint.pprint([(v.name, v.shape) for v in model.trainable_variables])

decay_lr = tfa.optimizers.Triangular2CyclicalLearningRate(
  initial_learning_rate = 1e-4,
  maximal_learning_rate = 8e-4,
  step_size = 8 * params['num_samples'] // params['batch_size'],
)
optim = tf.optimizers.Adam(1e-4)
global_step = 0

slot_best_f1 = .0
intent_acc_with_that = .0

t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)

for n_epoch in range(1, 64+1):
  # TRAINING
  for (words, (intent, slots)) in dataset(is_training=True, params=params):
    with tf.GradientTape() as tape:
      y_intent, y_slots = model(words, training=True)

      loss_intent = tf.compat.v1.losses.softmax_cross_entropy(
        onehot_labels = tf.one_hot(intent, len(params['intent2idx'])+1),
        logits = y_intent,
        label_smoothing = .2)
      
      log_likelihood, _ = tfa.text.crf_log_likelihood(
          inputs = y_slots,
          tag_indices = slots,
          sequence_lengths = tf.math.count_nonzero(words, axis=1, dtype=tf.int32),
          transition_params = model.trans_params,)
      loss_slots = - tf.reduce_mean(log_likelihood)

      loss = 4. * loss_intent + loss_slots
    
    train_vars = model.trainable_variables
    optim.lr.assign(decay_lr(global_step))
    grads = tape.gradient(loss, train_vars)
    grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
    optim.apply_gradients(zip(grads, train_vars))

    if global_step % 50 == 0:
      logger.info("Step {} | Loss: {:.4f} | Loss_intent: {:.4f} | Loss_slots: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
          global_step, loss.numpy().item(), loss_intent.numpy().item(), loss_slots.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
      t0 = time.time()
    global_step += 1
    
  # EVALUATION
  intent_true = []
  intent_pred = []
  slot_true = []
  slot_pred = []

  for (words, (intent, slots)) in dataset(is_training=False, params=params):
    y_intent, y_slots = model(words, training=False)
    y_intent = tf.argmax(y_intent, -1)
    y_slots, _ = tfa.text.crf_decode(
      potentials = y_slots,
      transition_params = model.trans_params,
      sequence_length = tf.math.count_nonzero(words, axis=1, dtype=tf.int32),)
    
    intent_true += intent.numpy().flatten().tolist()
    intent_pred += y_intent.numpy().flatten().tolist()
    slot_true += slots.numpy().flatten().tolist()
    slot_pred += y_slots.numpy().flatten().tolist()
    
  f1_slots = f1_score(y_true = slot_true,
                      y_pred = slot_pred,
                      labels = list(params['slot2idx'].values()),
                      sample_weight = np.sign(slot_true),
                      average='micro',)
  
  acc_intent = accuracy_score(intent_true, intent_pred)

  logger.info("Slot F1: {:.3f}, Intent Acc: {:.3f}".format(f1_slots, acc_intent))

  if n_epoch != 1 and n_epoch % 8 == 0:
    logger.info('\n'+classification_report(y_true = intent_true,
                                          y_pred = intent_pred,
                                          labels = list(params['intent2idx'].values()),
                                          target_names = list(params['intent2idx'].keys()),
                                          digits=3))
    logger.info('\n'+classification_report(y_true = slot_true,
                                          y_pred = slot_pred,
                                          labels = list(params['slot2idx'].values()),
                                          target_names = list(params['slot2idx'].keys()),
                                          sample_weight = np.sign(slot_true),
                                          digits=3))
  
  if f1_slots > slot_best_f1:
    slot_best_f1 = f1_slots
    intent_acc_with_that = acc_intent
    # you can save model here
  logger.info("Best Slot F1: {:.3f}, Intent Acc: {:.3f}".format(slot_best_f1, intent_acc_with_that))

[('bidirectional/forward_gru/gru_cell_1/kernel:0', TensorShape([300, 900])),
 ('bidirectional/forward_gru/gru_cell_1/recurrent_kernel:0',
  TensorShape([300, 900])),
 ('bidirectional/forward_gru/gru_cell_1/bias:0', TensorShape([2, 900])),
 ('bidirectional/backward_gru/gru_cell_2/kernel:0', TensorShape([300, 900])),
 ('bidirectional/backward_gru/gru_cell_2/recurrent_kernel:0',
  TensorShape([300, 900])),
 ('bidirectional/backward_gru/gru_cell_2/bias:0', TensorShape([2, 900])),
 ('fc_intent/kernel:0', TensorShape([1200, 300])),
 ('fc_intent/bias:0', TensorShape([300])),
 ('output_intent/kernel:0', TensorShape([300, 23])),
 ('output_intent/bias:0', TensorShape([23])),
 ('output_slot/kernel:0', TensorShape([600, 122])),
 ('output_slot/bias:0', TensorShape([122])),
 ('pretrained_embedding:0', TensorShape([750, 300])),
 ('transitions:0', TensorShape([122, 122]))]
Reading ../data/atis.train.w-intent.iob
Instructions for updating:
Use tf.identity instead.
INFO:tensorflow:Step 0 | Loss: 73.1080

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Step 2500 | Loss: 4.4316 | Loss_intent: 1.1019 | Loss_slots: 0.0239 | Spent: 11.6 secs | LR: 0.000797
INFO:tensorflow:Step 2550 | Loss: 4.4803 | Loss_intent: 1.0990 | Loss_slots: 0.0844 | Spent: 5.6 secs | LR: 0.000783
INFO:tensorflow:Step 2600 | Loss: 4.3988 | Loss_intent: 1.0924 | Loss_slots: 0.0293 | Spent: 5.7 secs | LR: 0.000769
INFO:tensorflow:Step 2650 | Loss: 4.4693 | Loss_intent: 1.1046 | Loss_slots: 0.0507 | Spent: 5.8 secs | LR: 0.000755
INFO:tensorflow:Step 2700 | Loss: 4.4692 | Loss_intent: 1.1108 | Loss_slots: 0.0260 | Spent: 5.3 secs | LR: 0.000741
INFO:tensorflow:Step 2750 | Loss: 4.4421 | Loss_intent: 1.1092 | Loss_slots: 0.0055 | Spent: 5.9 secs | LR: 0.000727
INFO:tensorflow:Step 2800 | Loss: 4.6282 | Loss_intent: 1.1163 | Loss_slots: 0.1628 | Spent: 5.5 secs | LR: 0.000713
Reading ../data/atis.test.w-intent.iob
INFO:tensorflow:Slot F1: 0.953, Intent Acc: 0.964
INFO:tensorflow:Best Slot F1: 0.955, Intent Acc: 0.953
Reading ../data/atis.train.w-intent.