In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/spoken_language_understanding/atis/main')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [3]:
import tensorflow as tf
import tensorflow_hub as hub

import logging
import time
import numpy as np

from sklearn.metrics import classification_report, f1_score, accuracy_score
from pathlib import Path

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 1.15.2
GPU Enabled: True


In [0]:
def get_vocab(vocab_path):
  word2idx = {}
  with open(vocab_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip()
      word2idx[line] = i
  return word2idx

In [0]:
def data_generator(f_path, params):
  print('Reading', f_path)
  with open(f_path) as f:
    for line in f:
      line = line.rstrip()
      text, slot_intent = line.split('\t')
      words = text.split()[1:-1]
      slot_intent = slot_intent.split()
      slots, intent = slot_intent[1:-1], slot_intent[-1]
      assert len(words) == len(slots)
      
      yield (words, (intent, slots))

In [0]:
def dataset(is_training, params):
  _shapes = ([None], ((), [None]))
  _types = (tf.string, (tf.string, tf.string))
  _pads = ('<pad>', ('_', 'O'))
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['train_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['num_samples'])
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['test_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(1, _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds

In [0]:
def clr(step,
        initial_learning_rate,
        maximal_learning_rate,
        step_size,
        scale_fn,
        scale_mode,):
  step = tf.cast(step, tf.float32)
  
  initial_learning_rate = tf.convert_to_tensor(
    initial_learning_rate, name='initial_learning_rate')
  dtype = initial_learning_rate.dtype
  maximal_learning_rate = tf.cast(maximal_learning_rate, dtype)
  step_size = tf.cast(step_size, dtype)
  cycle = tf.floor(1 + step / (2 * step_size))
  x = tf.abs(step / step_size - 2 * cycle + 1)

  mode_step = cycle if scale_mode == 'cycle' else step

  return initial_learning_rate + (
    maximal_learning_rate - initial_learning_rate) * tf.maximum(
      tf.cast(0, dtype), (1 - x)) * scale_fn(mode_step)

In [0]:
def model_fn(features, labels, mode, params):
  is_training = (mode == tf.estimator.ModeKeys.TRAIN)
  
  vocab = tf.contrib.lookup.index_table_from_file(
    params['word_path'], num_oov_buckets=1)
  words = vocab.lookup(features)
  seq_len = tf.count_nonzero(words, 1, dtype=tf.int32)
  
  embedding = np.load(params['vocab_path'])
  embedding = tf.Variable(embedding, name='embedding', dtype=tf.float32)
  x = tf.nn.embedding_lookup(embedding, words)
  
  elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=False)
  e = elmo(inputs={'tokens':features, 'sequence_len':seq_len}, signature='tokens', as_dict=True)['lstm_outputs1']
  
  x = tf.concat((x, e), -1)
  x = tf.layers.dropout(x, params['dropout_rate'], training=is_training)
  x = tf.layers.dense(x, params['rnn_units'], tf.nn.elu)
  x = tf.layers.dropout(x, params['dropout_rate'], training=is_training)
  
  cell_fw = tf.nn.rnn_cell.GRUCell(params['rnn_units'], kernel_initializer=tf.orthogonal_initializer())
  cell_bw = tf.nn.rnn_cell.GRUCell(params['rnn_units'], kernel_initializer=tf.orthogonal_initializer())
  o, (s_fw, s_bw) = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x, seq_len, dtype=tf.float32)
  x = tf.concat(o, -1)
  
  y_intent = tf.concat((tf.reduce_max(x, 1), s_fw, s_bw), -1)
  y_intent = tf.layers.dropout(y_intent, params['dropout_rate'], training=is_training)
  y_intent = tf.layers.dense(y_intent, params['rnn_units'], tf.nn.elu)
  y_intent = tf.layers.dense(y_intent, params['intent_size'])
  y_slots = tf.layers.dense(x, params['slot_size'])
  
  
  if labels is not None:
    intent, slots = labels
    
    vocab = tf.contrib.lookup.index_table_from_file(
      params['intent_path'], num_oov_buckets=1)
    intent = vocab.lookup(intent)
    vocab = tf.contrib.lookup.index_table_from_file(
      params['slot_path'], num_oov_buckets=1)
    slots = vocab.lookup(slots)
    
    loss_intent = tf.losses.softmax_cross_entropy(
      onehot_labels = tf.one_hot(intent, len(params['intent2idx'])+1),
      logits = y_intent,
      label_smoothing = .2)
    
    weights = tf.cast(tf.sign(slots), tf.float32)
    padding = tf.fill(tf.shape(weights), 1e-2)
    weights = tf.where(tf.equal(weights, 0.), padding, weights)
    loss_slots = tf.compat.v1.losses.softmax_cross_entropy(
      onehot_labels = tf.one_hot(slots, len(params['slot2idx'])+1),
      logits = y_slots,
      weights = tf.cast(weights, tf.float32),
      label_smoothing = .2)
    
    loss_op = loss_intent + loss_slots

  
  if mode == tf.estimator.ModeKeys.TRAIN:
    variables = tf.trainable_variables()
    
    grads = tf.gradients(loss_op, variables)
    grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
    
    global_step=tf.train.get_or_create_global_step()
    decay_lr = clr(
      step = global_step,
      initial_learning_rate = 1e-4,
      maximal_learning_rate = 8e-4,
      step_size = 8 * params['num_samples'] // params['batch_size'],
      scale_fn=lambda x: 1 / (2.0 ** (x - 1)),
      scale_mode = 'cycle',)
    hook = tf.train.LoggingTensorHook({'lr': decay_lr}, every_n_iter=100)
    
    optim = tf.train.AdamOptimizer(decay_lr)
    train_op = optim.apply_gradients(
      zip(grads, variables), global_step=global_step)
    
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss_op,
                                      train_op=train_op,
                                      training_hooks=[hook],)
  
  
  if mode == tf.estimator.ModeKeys.EVAL:
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss_op)
  
  
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode,
      predictions={'intent': tf.argmax(y_intent, -1),
                   'slots': tf.argmax(y_slots, -1)})

In [0]:
params = {
  'model_dir': '../model/elmo_bigru',
  'log_path': '../log/elmo_bigru.txt',
  'train_path': '../data/atis.train.w-intent.iob',
  'test_path': '../data/atis.test.w-intent.iob',
  'word_path': '../vocab/word.txt',
  'vocab_path': '../vocab/word.npy',
  'intent_path': '../vocab/intent.txt',
  'slot_path': '../vocab/slot.txt',
  'batch_size': 16,
  'num_samples': 4978,
  'rnn_units': 300,
  'dropout_rate': .2,
  'clip_norm': .1,
}

In [0]:
params['word2idx'] = get_vocab(params['word_path'])
params['intent2idx'] = get_vocab(params['intent_path'])
params['slot2idx'] = get_vocab(params['slot_path'])

params['word_size'] = len(params['word2idx']) + 1
params['intent_size'] = len(params['intent2idx']) + 1
params['slot_size'] = len(params['slot2idx']) + 1

In [11]:
# Create directory if not exist
Path(os.path.dirname(params['log_path'])).mkdir(exist_ok=True)
Path(params['model_dir']).mkdir(exist_ok=True, parents=True)

# Logging
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)
fh = logging.FileHandler(params['log_path'])
logger.addHandler(fh)
logger.propagate = False

# Create an estimator
config = tf.estimator.RunConfig(
  save_checkpoints_steps = params['num_samples']//params['batch_size'] + 1,
  keep_checkpoint_max=3)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  model_dir=params['model_dir'],
  config=config,
  params=params)

slot_best_f1 = .0
intent_acc_with_that = .0
tf.enable_eager_execution()

for n_epoch in range(1, 64+1):
  estimator.train(input_fn=lambda: dataset(is_training=True, params=params))
  
  intent = []
  slots = []
  for w, (i, s) in dataset(is_training=False, params=params):
    intent.append(i.numpy())
    slots.append(s.numpy())
  intent = [i for batch in intent for i in batch]
  intent = [params['intent2idx'].get(str(t, 'utf-8'), len(params['intent2idx'])) for t in intent]
  slots = [j for batch in slots for i in batch for j in i]
  slots = [params['slot2idx'].get(str(s, 'utf-8'), len(params['slot2idx'])) for s in slots]

  predicted = list(estimator.predict(input_fn=lambda: dataset(is_training=False, params=params)))
  y_slots = [j for i in predicted for j in i['slots']]
  y_intent = [i['intent'] for i in predicted]
  
  f1_slots = f1_score(y_true = slots,
                      y_pred = y_slots,
                      labels = list(params['slot2idx'].values()),
                      sample_weight = np.sign(slots),
                      average='micro',)
  
  acc_intent = accuracy_score(intent, y_intent)

  logger.info("Slot F1: {:.3f}, Intent Acc: {:.3f}".format(f1_slots, acc_intent))

  if n_epoch != 1 and n_epoch % 8 == 0:
    logger.info('\n'+classification_report(y_true = intent,
                                          y_pred = y_intent,
                                          labels = list(params['intent2idx'].values()),
                                          target_names = list(params['intent2idx'].keys()),
                                          digits=3))
    logger.info('\n'+classification_report(y_true = slots,
                                          y_pred = y_slots,
                                          labels = list(params['slot2idx'].values()),
                                          target_names = list(params['slot2idx'].keys()),
                                          sample_weight = np.sign(slots),
                                          digits=3))
  
  if f1_slots > slot_best_f1:
    slot_best_f1 = f1_slots
    intent_acc_with_that = acc_intent
    # you can save model here
  logger.info("Best Slot F1: {:.3f}, Intent Acc: {:.3f}".format(slot_best_f1, intent_acc_with_that))

INFO:tensorflow:Using config: {'_model_dir': '../model/elmo_bigru', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 312, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 3, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7effbb046390>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automa

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../model/elmo_bigru/model.ckpt-2496
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 2496 into ../model/elmo_bigru/model.ckpt.
Reading ../data/atis.train.w-intent.iob
INFO:tensorflow:loss = 1.5339439, step = 2496
INFO:tensorflow:lr = 0.00079803134
INFO:tensorflow:global_step/sec: 8.61191
INFO:tensorflow:loss = 1.4880711, step = 2596 (11.614 sec)
INFO:tensorflow:lr = 0.0007699076 (11.614 sec)
INFO:tensorflow:global_step/sec: 10.2188
INFO:tensorflow:loss = 1.4614279, step = 2696 (9.788 sec)
INFO:tensorflow:lr = 0.00074178376 (9.787 sec)
INFO:tensorflow:global_step/sec: 9.64632
INFO:tensorflow:loss = 1.6068125, step = 2796 (10.367 sec)
INFO:tensorflow:lr = 0.00071366003 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../model/elmo_bigru/model.ckpt-4992
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 4992 into ../model/elmo_bigru/model.ckpt.
Reading ../data/atis.train.w-intent.iob
INFO:tensorflow:loss = 1.3538101, step = 4992
INFO:tensorflow:lr = 0.00010196867
INFO:tensorflow:global_step/sec: 9.37402
INFO:tensorflow:loss = 1.3983464, step = 5092 (10.672 sec)
INFO:tensorflow:lr = 0.00011603057 (10.674 sec)
INFO:tensorflow:global_step/sec: 9.80098
INFO:tensorflow:loss = 1.5250366, step = 5192 (10.203 sec)
INFO:tensorflow:lr = 0.00013009239 (10.202 sec)
INFO:tensorflow:global_step/sec: 9.83411
INFO:tensorflow:loss = 1.4319367, step = 5292 (10.169 sec)
INFO:tensorflow:lr = 0.000144154

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../model/elmo_bigru/model.ckpt-7488
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 7488 into ../model/elmo_bigru/model.ckpt.
Reading ../data/atis.train.w-intent.iob
INFO:tensorflow:loss = 1.4039026, step = 7488
INFO:tensorflow:lr = 0.000447047
INFO:tensorflow:global_step/sec: 8.82955
INFO:tensorflow:loss = 1.2839608, step = 7588 (11.327 sec)
INFO:tensorflow:lr = 0.00043298508 (11.327 sec)
INFO:tensorflow:global_step/sec: 10.6101
INFO:tensorflow:loss = 1.3488975, step = 7688 (9.427 sec)
INFO:tensorflow:lr = 0.00041892327 (9.428 sec)
INFO:tensorflow:global_step/sec: 9.73681
INFO:tensorflow:loss = 1.4587625, step = 7788 (10.278 sec)
INFO:tensorflow:lr = 0.00040486135 (

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../model/elmo_bigru/model.ckpt-9984
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 9984 into ../model/elmo_bigru/model.ckpt.
Reading ../data/atis.train.w-intent.iob
INFO:tensorflow:loss = 1.4513614, step = 9984
INFO:tensorflow:lr = 0.00010196867
INFO:tensorflow:global_step/sec: 8.97816
INFO:tensorflow:loss = 1.389141, step = 10084 (11.142 sec)
INFO:tensorflow:lr = 0.00010899962 (11.142 sec)
INFO:tensorflow:global_step/sec: 9.92417
INFO:tensorflow:loss = 1.4163504, step = 10184 (10.077 sec)
INFO:tensorflow:lr = 0.00011603057 (10.076 sec)
INFO:tensorflow:global_step/sec: 9.98561
INFO:tensorflow:loss = 1.4134475, step = 10284 (10.013 sec)
INFO:tensorflow:lr = 0.0001230

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../model/elmo_bigru/model.ckpt-12480
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 12480 into ../model/elmo_bigru/model.ckpt.
Reading ../data/atis.train.w-intent.iob
INFO:tensorflow:loss = 1.4291271, step = 12480
INFO:tensorflow:lr = 0.00027253915
INFO:tensorflow:global_step/sec: 9.00107
INFO:tensorflow:loss = 1.4653343, step = 12580 (11.114 sec)
INFO:tensorflow:lr = 0.0002655082 (11.115 sec)
INFO:tensorflow:global_step/sec: 9.30082
INFO:tensorflow:loss = 1.2840335, step = 12680 (10.752 sec)
INFO:tensorflow:lr = 0.00025847726 (10.753 sec)
INFO:tensorflow:global_step/sec: 9.9105
INFO:tensorflow:loss = 1.403368, step = 12780 (10.090 sec)
INFO:tensorflow:lr = 0.000251

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../model/elmo_bigru/model.ckpt-14976
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 14976 into ../model/elmo_bigru/model.ckpt.
Reading ../data/atis.train.w-intent.iob
INFO:tensorflow:loss = 1.3492382, step = 14976
INFO:tensorflow:lr = 0.0001014765
INFO:tensorflow:global_step/sec: 9.11526
INFO:tensorflow:loss = 1.3974228, step = 15076 (10.975 sec)
INFO:tensorflow:lr = 0.000104991974 (10.977 sec)
INFO:tensorflow:global_step/sec: 9.85589
INFO:tensorflow:loss = 1.5027564, step = 15176 (10.146 sec)
INFO:tensorflow:lr = 0.000108507455 (10.145 sec)
INFO:tensorflow:global_step/sec: 9.87238
INFO:tensorflow:loss = 1.3273671, step = 15276 (10.127 sec)
INFO:tensorflow:lr = 0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../model/elmo_bigru/model.ckpt-17472
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 17472 into ../model/elmo_bigru/model.ckpt.
Reading ../data/atis.train.w-intent.iob
INFO:tensorflow:loss = 1.3522878, step = 17472
INFO:tensorflow:lr = 0.0001857774
INFO:tensorflow:global_step/sec: 9.19105
INFO:tensorflow:loss = 1.3775727, step = 17572 (10.882 sec)
INFO:tensorflow:lr = 0.00018226192 (10.880 sec)
INFO:tensorflow:global_step/sec: 10.0444
INFO:tensorflow:loss = 1.3149229, step = 17672 (9.956 sec)
INFO:tensorflow:lr = 0.00017874646 (9.958 sec)
INFO:tensorflow:global_step/sec: 10.0279
INFO:tensorflow:loss = 1.6317581, step = 17772 (9.974 sec)
INFO:tensorflow:lr = 0.0001752

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
