In [1]:
"""
We use following lines because we are running on Google Colab
If you are running notebook on a local computer, you don't need this cell
"""
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/semantic_parsing/tree_slu/main')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
!pip install tensorflow-gpu==2.0.0-alpha0



In [3]:
import tensorflow as tf
import numpy as np
import pprint
import logging
import time
import nltk
import os

from pathlib import Path

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 2.0.0-alpha0
GPU Enabled: True


In [0]:
# stream data from text files
def data_generator(f_path, params):
  with open(f_path) as f:
    print('Reading', f_path)
    for line in f:
      text_raw, text_tokenized, label = line.split('\t')
      text_tokenized = text_tokenized.lower().split()
      label = label.replace('[', '[ ').lower().split()
      source = [params['tgt2idx'].get(w, len(params['tgt2idx'])) for w in text_tokenized]
      target = [params['tgt2idx'].get(w, len(params['tgt2idx'])) for w in label]
      target_in = [1] + target
      target_out = target + [2]
      yield (source, target_in, target_out)

In [0]:
def dataset(is_training, params):
  _shapes = ([None], [None], [None])
  _types = (tf.int32, tf.int32, tf.int32)
  _pads = (0, 0, 0)
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['train_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['train_samples'])
    ds = ds.padded_batch(params['train_batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['test_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(params['eval_batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds

In [0]:
def get_timing_signal_1d(length,
                         channels,
                         min_timescale=1.0,
                         max_timescale=1.0e4,
                         start_index=0):
  to_float = lambda x: tf.cast(x, tf.float32)
  position = to_float(tf.range(length) + start_index)
  num_timescales = channels // 2
  log_timescale_increment = (
      tf.math.log(float(max_timescale) / float(min_timescale)) /
      tf.maximum(to_float(num_timescales) - 1, 1))
  inv_timescales = min_timescale * tf.exp(
      to_float(tf.range(num_timescales)) * -log_timescale_increment)
  scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0)
  signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
  signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]])
  signal = tf.reshape(signal, [1, length, channels])
  return signal

In [0]:
class LayerNorm(tf.keras.layers.Layer):
  def __init__(self, params):
    super().__init__()
    self._epsilon = params['epsilon']
    self._hidden_units = params['global_units']
  
  def build(self, input_shape):
    self.scale = self.add_weight(name='scale',
                                 shape=[self._hidden_units],
                                 initializer=tf.ones_initializer(),
                                 trainable=True)
    self.bias = self.add_weight(name='bias',
                                shape=[self._hidden_units],
                                initializer=tf.zeros_initializer(),
                                trainable=True)
    super().build(input_shape)
  
  def call(self, inputs):
    mean, variance = tf.nn.moments(inputs, [-1], keepdims=True)
    norm_x = (inputs - mean) * tf.math.rsqrt(variance + self._epsilon)
    return norm_x * self.scale + self.bias
  
  def compute_output_shape(self, input_shape):
    return input_shape

In [0]:
class PointwiseFFNBlock(tf.keras.Model):
  def __init__(self, params):
    super().__init__()
    self.layer_norm = LayerNorm(params)
    self.block_dropout = tf.keras.layers.Dropout(params['dropout_rate'])

    self.filter = tf.keras.layers.Dense(params['multiplier']*params['global_units'], tf.nn.relu, name='filter')
    self.dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    self.linear = tf.keras.layers.Dense(params['global_units'], name='linear')
  
  
  def call(self, inputs, training=False):
    x = self.layer_norm(inputs)
    x = self.forward(x, training=training)
    x = self.block_dropout(x, training=training)
    x += inputs
    return x
    
  
  def forward(self, x, training):
    return self.linear(self.dropout(self.filter(x), training=training))

In [0]:
class SelfAttentionBlock(tf.keras.Model):
  def __init__(self, params, is_bidirectional):
    super().__init__()
    
    self.layer_norm = LayerNorm(params)
    self.block_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    
    self.qkv_linear = tf.keras.layers.Dense(3 * params['global_units'], name='qkv_linear')
    self.dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    self.out_linear = tf.keras.layers.Dense(params['global_units'], name='out_linear')
    
    self._is_bidirectional = is_bidirectional
    self._num_heads = params['num_heads']
  
  
  def call(self, inputs, training=False):
    inputs, masks = inputs
    x = self.layer_norm(inputs)
    x = self.forward((x, masks), training=training)
    x = self.block_dropout(x, training=training)
    x += inputs
    return x
    
  
  def forward(self, inputs, training):
    x, masks = inputs
    timesteps = tf.shape(x)[1]
    
    q_k_v = self.qkv_linear(x)
    q, k, v = tf.split(q_k_v, 3, axis=-1)
    
    if self._num_heads > 1:
      q = tf.concat(tf.split(q, self._num_heads, axis=2), axis=0)                        
      k = tf.concat(tf.split(k, self._num_heads, axis=2), axis=0)                        
      v = tf.concat(tf.split(v, self._num_heads, axis=2), axis=0)
    
    align = tf.matmul(q, k, transpose_b=True)
    align *= tf.math.rsqrt(tf.cast(k.shape[-1], tf.float32))
    
    if (masks is not None) or (not self._is_bidirectional):
      paddings = tf.fill(tf.shape(align), float('-inf'))
    
    if masks is not None:
      c_masks = tf.tile(masks, [params['num_heads'], 1])
      c_masks = tf.tile(tf.expand_dims(c_masks, 1), [1, timesteps, 1])
      align = tf.where(tf.equal(c_masks, 0), paddings, align)
    
    if not self._is_bidirectional:
      lower_tri = tf.ones((timesteps, timesteps))                                       
      lower_tri = tf.linalg.LinearOperatorLowerTriangular(lower_tri).to_dense()      
      t_masks = tf.tile(tf.expand_dims(lower_tri, 0), [tf.shape(align)[0], 1, 1])     
      align = tf.where(tf.equal(t_masks, 0), paddings, align)
    
    align = tf.nn.softmax(align)
    align = self.dropout(align, training=training)
    
    if masks is not None:
      q_masks = tf.tile(masks, [params['num_heads'], 1])
      q_masks = tf.tile(tf.expand_dims(q_masks, 2), [1, 1, timesteps])
      align *= tf.cast(q_masks, tf.float32)
    
    x = tf.matmul(align, v)
    if self._num_heads > 1:
      x = tf.concat(tf.split(x, self._num_heads, axis=0), axis=2)
    x = self.out_linear(x)
    
    return x

In [0]:
class MutualAttentionBlock(tf.keras.Model):
  def __init__(self, params):
    super().__init__()
    self.layer_norm = LayerNorm(params)
    self.block_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    
    self.q_linear = tf.keras.layers.Dense(params['global_units'], name='q_linear')
    self.kv_linear = tf.keras.layers.Dense(2*params['global_units'], name='kv_linear')
    self.dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    self.out_linear = tf.keras.layers.Dense(params['global_units'], name='out_linear')
    
    self._num_heads = params['num_heads']
  
  
  def call(self, inputs, training=False):
    inputs, mask_dec, encoded, mask_enc = inputs
    x = self.layer_norm(inputs)
    x = self.forward((x, mask_dec, encoded, mask_enc), training=training)
    x = self.block_dropout(x, training=training)
    x += inputs
    return x
    
  
  def forward(self, inputs, training):
    query, mask_query, context, mask_context = inputs
    time_query, time_context = tf.shape(query)[1], tf.shape(context)[1]
    
    q = self.q_linear(query)
    k_v = self.kv_linear(context)
    k, v = tf.split(k_v, 2, axis=-1)
    
    if self._num_heads > 1:
      q = tf.concat(tf.split(q, self._num_heads, axis=2), axis=0)                        
      k = tf.concat(tf.split(k, self._num_heads, axis=2), axis=0)                        
      v = tf.concat(tf.split(v, self._num_heads, axis=2), axis=0)
    
    align = tf.matmul(q, k, transpose_b=True)
    align *= tf.math.rsqrt(tf.cast(k.shape[-1], tf.float32))
    
    paddings = tf.fill(tf.shape(align), float('-inf'))
    context_masks = tf.tile(mask_context, [self._num_heads, 1])
    context_masks = tf.tile(tf.expand_dims(context_masks, 1), [1, time_query, 1])
    align = tf.where(tf.equal(context_masks, 0), paddings, align)
    
    align = tf.nn.softmax(align)
    align = self.dropout(align, training=training)
    
    query_masks = tf.tile(mask_query, [self._num_heads, 1])
    query_masks = tf.tile(tf.expand_dims(query_masks, 2), [1, 1, time_context])
    align *= tf.cast(query_masks, tf.float32)
    
    x = tf.matmul(align, v)
    if self._num_heads > 1:
      x = tf.concat(tf.split(x, self._num_heads, axis=0), axis=2)
    x = self.out_linear(x)
    
    return x

In [0]:
class EncoderLayer(tf.keras.Model):
  def __init__(self, params, name):
    super().__init__(name=name)
    self.self_attention = SelfAttentionBlock(params, is_bidirectional=True)
    self.pointwise_ffn = PointwiseFFNBlock(params)
  
  
  def call(self, inputs, training=False):
    x, mask = inputs
    x = self.self_attention((x, mask), training=training)
    x = self.pointwise_ffn(x, training=training)
    return x

In [0]:
class DecoderLayer(tf.keras.Model):
  def __init__(self, params, name):
    super().__init__(name=name)
    self.self_attention = SelfAttentionBlock(params, is_bidirectional=False)
    self.mutual_attention = MutualAttentionBlock(params)
    self.pointwise_ffn = PointwiseFFNBlock(params)
  
  
  def call(self, inputs, training=False):
    decoded, mask_dec, encoded, mask_enc = inputs
    decoded = self.self_attention((decoded, mask_dec), training=training)
    decoded = self.mutual_attention((decoded, mask_dec, encoded, mask_enc), training=training)
    decoded = self.pointwise_ffn(decoded, training=training)
    return decoded

In [0]:
class Encoder(tf.keras.Model):
  def __init__(self, params):
    super().__init__()
    self.params = params
    self.embedding = tf.keras.layers.Embedding(len(params['tgt2idx']), params['global_units'],
      embeddings_initializer=tf.initializers.RandomNormal(stddev=params['global_units'] ** -0.5))
    self.input_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    self.encodes = [EncoderLayer(params, name='enc_layer_{}'.format(i+1)) for i in range(params['num_layers'])]
    
  
  def call(self, inputs, training=False):
    input_enc = inputs
    
    mask_enc = tf.sign(input_enc)
    
    input_enc = self.embedding(input_enc)
    
    encoded = self.input_transform(input_enc, training=training)
    
    for layer in self.encodes:
      encoded = layer((encoded, mask_enc), training=training)
    
    return encoded
  
  
  def input_transform(self, x, training):
    if self.params['is_embedding_scaled']:
      x *= tf.sqrt(tf.cast(self.params['global_units'], tf.float32))
    x += get_timing_signal_1d(tf.shape(x)[1], self.params['global_units'])
    x = self.input_dropout(x, training=training)
    return x

In [0]:
class Decoder(tf.keras.Model):
  def __init__(self, params, tied_embedding):
    super().__init__()
    self.params = params
    self.embedding = tied_embedding
    self.input_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    self.decodes = [DecoderLayer(params, name='dec_layer_{}'.format(i+1)) for i in range(params['num_layers'])]
    self.out_bias = self.add_weight(name='out_bias', shape=[len(params['tgt2idx'])])
    
  
  def call(self, inputs, training=False):
    input_dec, memory, memory_mask = inputs
    
    mask_dec = tf.sign(input_dec)
    
    input_dec = self.embedding(input_dec)
    
    decoded = self.input_transform(input_dec, training=training)
    
    for layer in self.decodes:
      decoded = layer((decoded, mask_dec, memory, memory_mask), training=training)
    
    logits = self.tied_output(decoded)
    return logits
  
  
  def input_transform(self, x, training):
    if self.params['is_embedding_scaled']:
      x *= tf.sqrt(tf.cast(self.params['global_units'], tf.float32))
    x += get_timing_signal_1d(tf.shape(x)[1], self.params['global_units'])
    x = self.input_dropout(x, training=training)
    return x
  
  
  def tied_output(self, decoded):
    axis_1, axis_2  = tf.shape(decoded)[0], tf.shape(decoded)[1]
    decoded = tf.reshape(decoded, (axis_1*axis_2, params['global_units']))
    logits = tf.matmul(decoded, self.embedding.embeddings, transpose_b=True)
    logits = tf.reshape(logits, (axis_1, axis_2, len(self.params['tgt2idx'])))
    logits = tf.nn.bias_add(logits, self.out_bias)
    return logits

In [0]:
def get_vocab(f_path):
  word2idx = {}
  with open(f_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip()
      word2idx[line] = i
  return word2idx

In [0]:
def is_descending(history: list) -> bool:
  history = history[-(params['num_patience']+1):]
  for i in range(1, len(history)):
    if history[i-1] <= history[i]:
      return False
  return True

In [0]:
params = {
    'train_path': '../data/train.tsv',
    'test_path': '../data/test.tsv',
    'vocab_src_path': '../vocab/source.txt',
    'vocab_tgt_path': '../vocab/target.txt',
    'model_path': '../model/',
    'num_layers': 3,
    'dropout_rate': 0.2,
    'global_units': 300,
    'num_heads': 4,
    'multiplier': 2,
    'epsilon': 1e-6,
    'is_embedding_scaled': True,
    'max_decode_len': 50,
    'lr': 4e-4,
    'train_samples': 31279,
    'train_batch_size': 32,
    'eval_samples': 9042,
    'eval_batch_size': 300,
    'num_patience': 5,
}

In [0]:
params['tgt2idx'] = get_vocab(params['vocab_tgt_path'])
params['idx2tgt'] = {idx: tgt for tgt, idx in params['tgt2idx'].items()}

In [19]:
Path(params['model_path']).mkdir(exist_ok=True)

encoder = Encoder(params)
encoder.build((None, None))
pprint.pprint([(v.name, v.shape) for v in encoder.trainable_variables])

decoder = Decoder(params, encoder.embedding)
decoder.build([[None, None], [None, None, params['global_units']], [None, None]])
pprint.pprint([(v.name, v.shape) for v in decoder.trainable_variables])

[('embedding/embeddings:0', TensorShape([8691, 300])),
 ('enc_layer_1/self_attention_block/layer_norm/scale:0', TensorShape([300])),
 ('enc_layer_1/self_attention_block/layer_norm/bias:0', TensorShape([300])),
 ('enc_layer_1/self_attention_block/qkv_linear/kernel:0',
  TensorShape([300, 900])),
 ('enc_layer_1/self_attention_block/qkv_linear/bias:0', TensorShape([900])),
 ('enc_layer_1/self_attention_block/out_linear/kernel:0',
  TensorShape([300, 300])),
 ('enc_layer_1/self_attention_block/out_linear/bias:0', TensorShape([300])),
 ('enc_layer_1/pointwise_ffn_block/layer_norm_1/scale:0', TensorShape([300])),
 ('enc_layer_1/pointwise_ffn_block/layer_norm_1/bias:0', TensorShape([300])),
 ('enc_layer_1/pointwise_ffn_block/filter/kernel:0', TensorShape([300, 600])),
 ('enc_layer_1/pointwise_ffn_block/filter/bias:0', TensorShape([600])),
 ('enc_layer_1/pointwise_ffn_block/linear/kernel:0', TensorShape([600, 300])),
 ('enc_layer_1/pointwise_ffn_block/linear/bias:0', TensorShape([300])),
 ('en

In [0]:
decay_lr = tf.optimizers.schedules.ExponentialDecay(params['lr'], 1000, 0.96)
optim = tf.optimizers.Adam(params['lr'])
global_step = 0

In [0]:
history_acc = []
best_acc = .0

In [0]:
t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)

In [0]:
def minimal_test(encoder, decoder, params):
  test_str = ['what', 'times', 'are', 'the', 'nutcracker', 'show', 'playing', 'near', 'me']
  test_arr = tf.convert_to_tensor([[params['tgt2idx'][w] for w in test_str]])
  generated = tf.convert_to_tensor([[1]])
  memory = encoder(test_arr, training=False)
  memory_mask = tf.sign(test_arr)
  
  for i in range(params['max_decode_len']):
    logits = decoder((generated, memory, memory_mask), training=False)
    ids = tf.argmax(logits[:, i, :], axis=-1, output_type=tf.int32)
    ids = tf.expand_dims(ids, 1)
    generated = tf.concat((generated, ids), axis=1)
    if np.asscalar(ids.numpy()) == 2:
      break
  print('-'*12)
  print('minimal test')
  print('utterance:', ' '.join(test_str))
  parsed = ' '.join([params['idx2tgt'][idx] for idx in generated[0].numpy()[1:-1]])
  print('parsed:', parsed)
  print()
  try:
    nltk.tree.Tree.fromstring(parsed.replace('[ ', '(').replace(' ]', ')')).pretty_print()
  except:
    pass
  print('-'*12)

In [0]:
def is_descending(history: list) -> bool:
  history = history[-(params['num_patience']+1):]
  for i in range(1, len(history)):
    if history[i-1] <= history[i]:
      return False
  return True  

In [25]:
while True:
  # TRAINING
  is_training = True
  for (source, target_in, target_out) in dataset(is_training=is_training, params=params):
    with tf.GradientTape() as tape:
      memory = encoder(source, training=is_training)
      logits = decoder((target_in, memory, tf.sign(source)), training=is_training)
      loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_out, logits=logits)
      weights = tf.cast(tf.sign(target_out), tf.float32)
      loss = tf.reduce_sum(loss * weights) / tf.reduce_sum(weights)
      
    optim.lr.assign(decay_lr(global_step))
    variables = encoder.trainable_variables + decoder.trainable_variables
    grads = tape.gradient(loss, variables)
    optim.apply_gradients(zip(grads, variables))
    
    if global_step % 50 == 0:
      logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
          global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
      t0 = time.time()
    
    global_step += 1
  
  # EVALUATION
  minimal_test(encoder, decoder, params)
  m = tf.keras.metrics.Mean()
  is_training=False
  
  for i, (source, target_in, target_out) in enumerate(dataset(is_training=is_training, params=params)):
    generated = tf.ones((source.shape[0], 1), tf.int32)
    memory = encoder(source, training=is_training)
    memory_mask = tf.sign(source)
    
    for j in range(target_out.shape[1]):
      logits = decoder((generated, memory, memory_mask), training=is_training)
      ids = tf.argmax(logits[:, j, :], axis=-1, output_type=tf.int32)
      ids = tf.expand_dims(ids, 1)
      generated = tf.concat((generated, ids), axis=1)

    seq_lens = tf.argmax(tf.cast(tf.equal(target_out, 2), tf.int32), axis=1)
    for pred, tgt, seq_len in zip(generated.numpy(), target_out.numpy(), seq_lens.numpy()):
      pred = pred[1:][:seq_len+1]
      tgt = tgt[:seq_len+1]
      matched = np.all(pred == tgt)
      m.update_state(int(matched))
    print("Testing [{}/{}]".format(i, params['eval_samples']//params['eval_batch_size']))
  
  acc = m.result().numpy()
  logger.info("Evaluation: Testing Exact Match Accuracy: {:.3f}".format(acc))
  history_acc.append(acc)

  if acc > best_acc:
    best_acc = acc
    encoder.save_weights(os.path.join(params['model_path'], 'encoder_{}'.format(global_step)))
    decoder.save_weights(os.path.join(params['model_path'], 'decoder_{}'.format(global_step)))
  logger.info("Best Accuracy: {:.3f}".format(best_acc))

  if len(history_acc) > params['num_patience'] and is_descending(history_acc):
    logger.info("Testing Accuracy not improved over {} epochs, Early Stop".format(params['num_patience']))
    break

W0509 01:21:11.768664 139936780654464 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/data/ops/dataset_ops.py:410: py_func (from tensorflow.python.ops.script_ops) is deprecated and will be removed in a future version.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, there are two
    options available in V2.
    - tf.py_function takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    - tf.numpy_function maintains the semantics of the deprecated tf.py_func
    (it is not differentiable, and manipulates numpy arrays). It drops the
    stateful argument making all functions stateful.
    


Reading ../data/train.tsv


I0509 01:21:19.049518 139936780654464 interactiveshell.py:2882] Step 0 | Loss: 16.1638 | Spent: 7.4 secs | LR: 0.000400
I0509 01:21:59.613191 139936780654464 interactiveshell.py:2882] Step 50 | Loss: 4.4796 | Spent: 40.6 secs | LR: 0.000399
I0509 01:22:38.784964 139936780654464 interactiveshell.py:2882] Step 100 | Loss: 3.2376 | Spent: 39.2 secs | LR: 0.000398
I0509 01:23:18.624637 139936780654464 interactiveshell.py:2882] Step 150 | Loss: 2.8610 | Spent: 39.8 secs | LR: 0.000398
I0509 01:23:57.507603 139936780654464 interactiveshell.py:2882] Step 200 | Loss: 2.2647 | Spent: 38.9 secs | LR: 0.000397
I0509 01:24:38.523415 139936780654464 interactiveshell.py:2882] Step 250 | Loss: 2.2872 | Spent: 41.0 secs | LR: 0.000396
I0509 01:25:17.143101 139936780654464 interactiveshell.py:2882] Step 300 | Loss: 2.1278 | Spent: 38.6 secs | LR: 0.000395
I0509 01:25:56.598409 139936780654464 interactiveshell.py:2882] Step 350 | Loss: 1.4877 | Spent: 39.5 secs | LR: 0.000394
I0509 01:26:35.385920 13993

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are the nutcracker show playing [ sl:location nutcracker ] [ sl:category_event playing ] [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ] ]

------------
Reading ../data/test.tsv
Testing [0/30]
Testing [1/30]
Testing [2/30]
Testing [3/30]
Testing [4/30]
Testing [5/30]
Testing [6/30]
Testing [7/30]
Testing [8/30]
Testing [9/30]
Testing [10/30]
Testing [11/30]
Testing [12/30]
Testing [13/30]
Testing [14/30]
Testing [15/30]
Testing [16/30]
Testing [17/30]
Testing [18/30]
Testing [19/30]
Testing [20/30]
Testing [21/30]
Testing [22/30]
Testing [23/30]
Testing [24/30]
Testing [25/30]
Testing [26/30]
Testing [27/30]
Testing [28/30]
Testing [29/30]


I0509 01:46:38.935932 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.241


Testing [30/30]


I0509 01:46:39.157994 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.241


Reading ../data/train.tsv


I0509 01:47:01.527430 139936780654464 interactiveshell.py:2882] Step 1000 | Loss: 0.5929 | Spent: 796.0 secs | LR: 0.000384
I0509 01:47:41.349691 139936780654464 interactiveshell.py:2882] Step 1050 | Loss: 0.5607 | Spent: 39.8 secs | LR: 0.000383
I0509 01:48:19.409456 139936780654464 interactiveshell.py:2882] Step 1100 | Loss: 0.7038 | Spent: 38.1 secs | LR: 0.000382
I0509 01:48:58.619060 139936780654464 interactiveshell.py:2882] Step 1150 | Loss: 0.6052 | Spent: 39.2 secs | LR: 0.000382
I0509 01:49:36.589587 139936780654464 interactiveshell.py:2882] Step 1200 | Loss: 0.6520 | Spent: 38.0 secs | LR: 0.000381
I0509 01:50:16.592535 139936780654464 interactiveshell.py:2882] Step 1250 | Loss: 0.9052 | Spent: 40.0 secs | LR: 0.000380
I0509 01:50:55.673408 139936780654464 interactiveshell.py:2882] Step 1300 | Loss: 0.5354 | Spent: 39.1 secs | LR: 0.000379
I0509 01:51:34.846404 139936780654464 interactiveshell.py:2882] Step 1350 | Loss: 0.7518 | Spent: 39.2 secs | LR: 0.000379
I0509 01:52:12.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are the nutcracker show playing ] [ sl:category_event playing ] [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ] ]

------------
Reading ../data/test.tsv
Testing [0/30]
Testing [1/30]
Testing [2/30]
Testing [3/30]
Testing [4/30]
Testing [5/30]
Testing [6/30]
Testing [7/30]
Testing [8/30]
Testing [9/30]
Testing [10/30]
Testing [11/30]
Testing [12/30]
Testing [13/30]
Testing [14/30]
Testing [15/30]
Testing [16/30]
Testing [17/30]
Testing [18/30]
Testing [19/30]
Testing [20/30]
Testing [21/30]
Testing [22/30]
Testing [23/30]
Testing [24/30]
Testing [25/30]
Testing [26/30]
Testing [27/30]
Testing [28/30]
Testing [29/30]


I0509 02:11:53.616971 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.435


Testing [30/30]


I0509 02:11:53.844871 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.435


Reading ../data/train.tsv


I0509 02:12:34.552451 139936780654464 interactiveshell.py:2882] Step 2000 | Loss: 0.4296 | Spent: 794.0 secs | LR: 0.000369
I0509 02:13:14.991754 139936780654464 interactiveshell.py:2882] Step 2050 | Loss: 0.3847 | Spent: 40.4 secs | LR: 0.000368
I0509 02:13:53.531533 139936780654464 interactiveshell.py:2882] Step 2100 | Loss: 0.3993 | Spent: 38.5 secs | LR: 0.000367
I0509 02:14:32.993505 139936780654464 interactiveshell.py:2882] Step 2150 | Loss: 0.5225 | Spent: 39.5 secs | LR: 0.000366
I0509 02:15:11.114218 139936780654464 interactiveshell.py:2882] Step 2200 | Loss: 0.2978 | Spent: 38.1 secs | LR: 0.000366
I0509 02:15:50.272045 139936780654464 interactiveshell.py:2882] Step 2250 | Loss: 0.3991 | Spent: 39.2 secs | LR: 0.000365
I0509 02:16:29.995265 139936780654464 interactiveshell.py:2882] Step 2300 | Loss: 0.4213 | Spent: 39.7 secs | LR: 0.000364
I0509 02:17:09.534957 139936780654464 interactiveshell.py:2882] Step 2350 | Loss: 0.3430 | Spent: 39.5 secs | LR: 0.000363
I0509 02:17:48.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are the nutcracker show [ sl:category_event playing ] [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ]

                   in:get_event                                                                        
  ______________________|_____________________________________________________                          
 |     |    |   |       |        |          |                            sl:location                   
 |     |    |   |       |        |          |                                 |                         
 |     |    |   |       |        |          |                          in:get_location                 
 |     |    |   |       |        |          |                 ________________|_______________          
 |     |    |   |       |        |   sl:category_even sl:search_radius                 sl:location_user
 |  

I0509 02:37:14.898424 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.466


Testing [30/30]


I0509 02:37:15.122094 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.466


Reading ../data/train.tsv


I0509 02:37:32.849627 139936780654464 interactiveshell.py:2882] Step 2950 | Loss: 0.2550 | Spent: 794.4 secs | LR: 0.000355
I0509 02:38:11.821692 139936780654464 interactiveshell.py:2882] Step 3000 | Loss: 0.2395 | Spent: 39.0 secs | LR: 0.000354
I0509 02:38:50.948942 139936780654464 interactiveshell.py:2882] Step 3050 | Loss: 0.3061 | Spent: 39.1 secs | LR: 0.000353
I0509 02:39:29.174814 139936780654464 interactiveshell.py:2882] Step 3100 | Loss: 0.3031 | Spent: 38.2 secs | LR: 0.000352
I0509 02:40:08.333458 139936780654464 interactiveshell.py:2882] Step 3150 | Loss: 0.1831 | Spent: 39.2 secs | LR: 0.000352
I0509 02:40:46.524672 139936780654464 interactiveshell.py:2882] Step 3200 | Loss: 0.2622 | Spent: 38.2 secs | LR: 0.000351
I0509 02:41:25.668602 139936780654464 interactiveshell.py:2882] Step 3250 | Loss: 0.2926 | Spent: 39.1 secs | LR: 0.000350
I0509 02:42:04.751686 139936780654464 interactiveshell.py:2882] Step 3300 | Loss: 0.1669 | Spent: 39.1 secs | LR: 0.000350
I0509 02:42:44.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are [ sl:category_event the nutcracker show ] show me [ sl:category_event playing ] [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ] ]

------------
Reading ../data/test.tsv
Testing [0/30]
Testing [1/30]
Testing [2/30]
Testing [3/30]
Testing [4/30]
Testing [5/30]
Testing [6/30]
Testing [7/30]
Testing [8/30]
Testing [9/30]
Testing [10/30]
Testing [11/30]
Testing [12/30]
Testing [13/30]
Testing [14/30]
Testing [15/30]
Testing [16/30]
Testing [17/30]
Testing [18/30]
Testing [19/30]
Testing [20/30]
Testing [21/30]
Testing [22/30]
Testing [23/30]
Testing [24/30]
Testing [25/30]
Testing [26/30]
Testing [27/30]
Testing [28/30]
Testing [29/30]


I0509 03:02:27.581637 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.544


Testing [30/30]


I0509 03:02:27.826102 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.544


Reading ../data/train.tsv


I0509 03:03:04.281676 139936780654464 interactiveshell.py:2882] Step 3950 | Loss: 0.2583 | Spent: 790.5 secs | LR: 0.000340
I0509 03:03:43.733585 139936780654464 interactiveshell.py:2882] Step 4000 | Loss: 0.1926 | Spent: 39.5 secs | LR: 0.000340
I0509 03:04:22.826245 139936780654464 interactiveshell.py:2882] Step 4050 | Loss: 0.2137 | Spent: 39.1 secs | LR: 0.000339
I0509 03:05:01.046948 139936780654464 interactiveshell.py:2882] Step 4100 | Loss: 0.2296 | Spent: 38.2 secs | LR: 0.000338
I0509 03:05:40.370499 139936780654464 interactiveshell.py:2882] Step 4150 | Loss: 0.2157 | Spent: 39.3 secs | LR: 0.000338
I0509 03:06:18.809179 139936780654464 interactiveshell.py:2882] Step 4200 | Loss: 0.2142 | Spent: 38.4 secs | LR: 0.000337
I0509 03:06:58.231165 139936780654464 interactiveshell.py:2882] Step 4250 | Loss: 0.2383 | Spent: 39.4 secs | LR: 0.000336
I0509 03:07:36.696148 139936780654464 interactiveshell.py:2882] Step 4300 | Loss: 0.3071 | Spent: 38.5 secs | LR: 0.000336
I0509 03:08:17.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are [ sl:category_event the nutcracker show show ] [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ]

                     in:get_event                                                              
  ________________________|___________________________________________                          
 |     |    |             |                                      sl:location                   
 |     |    |             |                                           |                         
 |     |    |             |                                    in:get_location                 
 |     |    |             |                           ________________|_______________          
 |     |    |      sl:category_even           sl:search_radius                 sl:location_user
 |     |    |             t                          |         

I0509 03:27:43.961975 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.564


Testing [30/30]


I0509 03:27:44.193799 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.564


Reading ../data/train.tsv


I0509 03:27:58.759199 139936780654464 interactiveshell.py:2882] Step 4900 | Loss: 0.2359 | Spent: 791.1 secs | LR: 0.000327
I0509 03:28:38.355973 139936780654464 interactiveshell.py:2882] Step 4950 | Loss: 0.1572 | Spent: 39.6 secs | LR: 0.000327
I0509 03:29:20.461044 139936780654464 interactiveshell.py:2882] Step 5000 | Loss: 0.1591 | Spent: 42.1 secs | LR: 0.000326
I0509 03:29:59.274892 139936780654464 interactiveshell.py:2882] Step 5050 | Loss: 0.2545 | Spent: 38.8 secs | LR: 0.000325
I0509 03:30:39.382034 139936780654464 interactiveshell.py:2882] Step 5100 | Loss: 0.2429 | Spent: 40.1 secs | LR: 0.000325
I0509 03:31:18.986181 139936780654464 interactiveshell.py:2882] Step 5150 | Loss: 0.2767 | Spent: 39.6 secs | LR: 0.000324
I0509 03:31:59.606035 139936780654464 interactiveshell.py:2882] Step 5200 | Loss: 0.2777 | Spent: 40.6 secs | LR: 0.000323
I0509 03:32:39.464336 139936780654464 interactiveshell.py:2882] Step 5250 | Loss: 0.1653 | Spent: 39.9 secs | LR: 0.000323
I0509 03:33:20.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are [ sl:category_event the nutcracker show ] [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ]

               in:get_event                                                                        
  __________________|_____________________________________________________                          
 |     |    |                      |                                 sl:location                   
 |     |    |                      |                                      |                         
 |     |    |                      |                               in:get_location                 
 |     |    |                      |                      ________________|_______________          
 |     |    |               sl:category_even      sl:search_radius                 sl:location_user
 |     |    |                      t    

I0509 03:53:28.867632 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.597


Testing [30/30]


I0509 03:53:29.089282 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.597


Reading ../data/train.tsv


I0509 03:53:59.521370 139936780654464 interactiveshell.py:2882] Step 5900 | Loss: 0.1213 | Spent: 793.7 secs | LR: 0.000314
I0509 03:54:40.294264 139936780654464 interactiveshell.py:2882] Step 5950 | Loss: 0.1185 | Spent: 40.8 secs | LR: 0.000314
I0509 03:55:19.268574 139936780654464 interactiveshell.py:2882] Step 6000 | Loss: 0.2134 | Spent: 39.0 secs | LR: 0.000313
I0509 03:55:57.593181 139936780654464 interactiveshell.py:2882] Step 6050 | Loss: 0.1753 | Spent: 38.3 secs | LR: 0.000312
I0509 03:56:37.036359 139936780654464 interactiveshell.py:2882] Step 6100 | Loss: 0.2051 | Spent: 39.4 secs | LR: 0.000312
I0509 03:57:15.234169 139936780654464 interactiveshell.py:2882] Step 6150 | Loss: 0.1612 | Spent: 38.2 secs | LR: 0.000311
I0509 03:57:54.637874 139936780654464 interactiveshell.py:2882] Step 6200 | Loss: 0.1390 | Spent: 39.4 secs | LR: 0.000311
I0509 03:58:32.939370 139936780654464 interactiveshell.py:2882] Step 6250 | Loss: 0.0871 | Spent: 38.3 secs | LR: 0.000310
I0509 03:59:11.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are [ sl:category_event the nutcracker show ] playing [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ]

                       in:get_event                                                                        
  __________________________|_____________________________________________________                          
 |     |    |     |                        |                                 sl:location                   
 |     |    |     |                        |                                      |                         
 |     |    |     |                        |                               in:get_location                 
 |     |    |     |                        |                      ________________|_______________          
 |     |    |     |                 sl:category_even      sl:search_radius          

I0509 04:19:05.961334 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.620


Testing [30/30]


I0509 04:19:06.195799 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.620


Reading ../data/train.tsv


I0509 04:19:14.894891 139936780654464 interactiveshell.py:2882] Step 6850 | Loss: 0.1736 | Spent: 806.1 secs | LR: 0.000302
I0509 04:19:54.473546 139936780654464 interactiveshell.py:2882] Step 6900 | Loss: 0.1553 | Spent: 39.6 secs | LR: 0.000302
I0509 04:20:36.239653 139936780654464 interactiveshell.py:2882] Step 6950 | Loss: 0.1537 | Spent: 41.8 secs | LR: 0.000301
I0509 04:21:15.201652 139936780654464 interactiveshell.py:2882] Step 7000 | Loss: 0.2013 | Spent: 39.0 secs | LR: 0.000301
I0509 04:21:55.306153 139936780654464 interactiveshell.py:2882] Step 7050 | Loss: 0.1566 | Spent: 40.1 secs | LR: 0.000300
I0509 04:22:34.288178 139936780654464 interactiveshell.py:2882] Step 7100 | Loss: 0.0897 | Spent: 39.0 secs | LR: 0.000299
I0509 04:23:14.328056 139936780654464 interactiveshell.py:2882] Step 7150 | Loss: 0.1597 | Spent: 40.0 secs | LR: 0.000299
I0509 04:23:53.374871 139936780654464 interactiveshell.py:2882] Step 7200 | Loss: 0.1966 | Spent: 39.0 secs | LR: 0.000298
I0509 04:24:33.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are [ sl:category_event the nutcracker show ] playing [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ] ]

                       in:get_event                                                                        
  __________________________|_____________________________________________________                          
 |     |    |     |                        |                                 sl:location                   
 |     |    |     |                        |                                      |                         
 |     |    |     |                        |                               in:get_location                 
 |     |    |     |                        |                      ________________|_______________          
 |     |    |     |                 sl:category_even      sl:search_radius          

I0509 04:44:52.764472 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.631


Testing [30/30]


I0509 04:44:52.980278 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.631


Reading ../data/train.tsv


I0509 04:45:19.250391 139936780654464 interactiveshell.py:2882] Step 7850 | Loss: 0.2583 | Spent: 807.7 secs | LR: 0.000290
I0509 04:45:59.765200 139936780654464 interactiveshell.py:2882] Step 7900 | Loss: 0.1241 | Spent: 40.5 secs | LR: 0.000290
I0509 04:46:39.671032 139936780654464 interactiveshell.py:2882] Step 7950 | Loss: 0.1335 | Spent: 39.9 secs | LR: 0.000289
I0509 04:47:19.019146 139936780654464 interactiveshell.py:2882] Step 8000 | Loss: 0.0924 | Spent: 39.3 secs | LR: 0.000289
I0509 04:47:57.221684 139936780654464 interactiveshell.py:2882] Step 8050 | Loss: 0.0763 | Spent: 38.2 secs | LR: 0.000288
I0509 04:48:36.619650 139936780654464 interactiveshell.py:2882] Step 8100 | Loss: 0.0971 | Spent: 39.4 secs | LR: 0.000287
I0509 04:49:14.789509 139936780654464 interactiveshell.py:2882] Step 8150 | Loss: 0.1108 | Spent: 38.2 secs | LR: 0.000287
I0509 04:49:54.099240 139936780654464 interactiveshell.py:2882] Step 8200 | Loss: 0.1315 | Spent: 39.3 secs | LR: 0.000286
I0509 04:50:32.

------------
minimal test
utterance: what times are the nutcracker show playing near me
parsed: [ in:get_event what times are [ sl:category_event the nutcracker show ] playing [ sl:location [ in:get_location [ sl:search_radius near ] [ sl:location_user me ] ] ]

------------
Reading ../data/test.tsv
Testing [0/30]
Testing [1/30]
Testing [2/30]
Testing [3/30]
Testing [4/30]
Testing [5/30]
Testing [6/30]
Testing [7/30]
Testing [8/30]
Testing [9/30]
Testing [10/30]
Testing [11/30]
Testing [12/30]
Testing [13/30]
Testing [14/30]
Testing [15/30]
Testing [16/30]
Testing [17/30]
Testing [18/30]
Testing [19/30]
Testing [20/30]
Testing [21/30]
Testing [22/30]
Testing [23/30]
Testing [24/30]
Testing [25/30]
Testing [26/30]
Testing [27/30]
Testing [28/30]
Testing [29/30]


I0509 05:10:14.594712 139936780654464 interactiveshell.py:2882] Evaluation: Testing Exact Match Accuracy: 0.618
I0509 05:10:14.595732 139936780654464 interactiveshell.py:2882] Best Accuracy: 0.631


Testing [30/30]
Reading ../data/train.tsv


I0509 05:10:57.132561 139936780654464 interactiveshell.py:2882] Step 8850 | Loss: 0.0911 | Spent: 795.3 secs | LR: 0.000279


KeyboardInterrupt: ignored