In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/free_chat/chinese_lccc/main')

In [2]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [3]:
import tensorflow as tf
import numpy as np

from modified_beam_search_decoder import BeamSearchDecoder

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 1.15.2
GPU Enabled: False


In [4]:
def rnn_cell():
  def cell_fn():
    cell = tf.nn.rnn_cell.LSTMCell(params['rnn_units'],
                                  initializer=tf.orthogonal_initializer())
    return cell
  if params['dec_layers'] > 1:
    cells = []
    for i in range(params['dec_layers']):
      if i == params['dec_layers'] - 1:
        cells.append(cell_fn())
      else:
        cells.append(tf.nn.rnn_cell.ResidualWrapper(cell_fn(), residual_fn=lambda i,o: tf.concat((i,o), -1)))
    return tf.nn.rnn_cell.MultiRNNCell(cells)
  else:
    return cell_fn()

  
def dec_cell(enc_out, enc_seq_len):
  attn = tf.contrib.seq2seq.BahdanauAttention(
    num_units = params['rnn_units'],
    memory = enc_out,
    memory_sequence_length = enc_seq_len)
  
  return tf.contrib.seq2seq.AttentionWrapper(
    cell = rnn_cell(),
    attention_mechanism = attn,
    attention_layer_size = params['rnn_units'])

In [5]:
class TiedDense(tf.layers.Layer):
  def __init__(self, tied_embed, out_dim):
    super().__init__()
    self.tied_embed = tied_embed
    self.out_dim = out_dim
  
  def build(self, input_shape):
    self.bias = self.add_weight(name='bias',
                                shape=[self.out_dim],
                                trainable=True)
    super().build(input_shape)
  
  def call(self, inputs):
    x = tf.matmul(inputs, self.tied_embed, transpose_b=True)
    x = tf.nn.bias_add(x, self.bias)
    return tf.nn.softmax(x)
  
  def compute_output_shape(self, input_shape):
    return input_shape[:-1].concatenate(self.out_dim)

In [6]:
class Pointer(tf.layers.Layer):
  def __init__(self, vocab_size, encoder_ids, encoder_out, is_beam_search):
    super().__init__()
    self.encoder_ids = encoder_ids
    self.encoder_out = encoder_out
    self.vocab_size = vocab_size
    self.is_beam_search = is_beam_search

  def call(self, inputs):
    _max_len = tf.shape(self.encoder_ids)[1]
    _batch_size_ori = tf.shape(inputs)[0]
    if self.is_beam_search:
      _batch_size= _batch_size_ori * params['beam_width']
    else:
      _batch_size = _batch_size_ori
    inputs = tf.reshape(inputs, (_batch_size, params['rnn_units']))

    attn_weights = tf.matmul(self.encoder_out, tf.expand_dims(inputs, -1))
    attn_weights = tf.squeeze(attn_weights, -1)
    updates = tf.nn.softmax(attn_weights)
    
    batch_nums = tf.range(0, _batch_size)
    batch_nums = tf.expand_dims(batch_nums, axis=1)
    batch_nums = tf.tile(batch_nums, [1, _max_len])

    indices = tf.stack([batch_nums, self.encoder_ids], axis=2)
    if self.is_beam_search:
      x = tf.scatter_nd(indices, updates, (_batch_size, self.vocab_size))
      return tf.reshape(x, (_batch_size_ori, params['beam_width'], self.vocab_size))
    else:
      x = tf.scatter_nd(indices, updates, (_batch_size, self.vocab_size))
      return x
  
  def compute_output_shape(self, input_shape):
    return input_shape[:-1].concatenate(self.vocab_size)

In [7]:
class OutputProj(tf.layers.Layer):
  def __init__(self, tied_embed, vocab_size, encoder_ids, encoder_out, is_beam_search):
    super().__init__()
    self.generator = TiedDense(tied_embed, vocab_size)
    self.pointer = Pointer(vocab_size, encoder_ids, encoder_out, is_beam_search)
    self.vocab_size = vocab_size

  def build(self, input_shape):
    self.gate_fc = tf.keras.layers.Dense(1, tf.sigmoid, use_bias=False)
    super().build(input_shape)
  
  def call(self, inputs):
    gen_dist = self.generator(inputs)
    copy_dist = self.pointer(inputs)
    gate = self.gate_fc(inputs)
    return gate * gen_dist + (1 - gate) * copy_dist
  
  def compute_output_shape(self, input_shape):
    return input_shape[:-1].concatenate(self.vocab_size)

In [8]:
def forward(features, labels, mode):
    words = features['words'] if isinstance(features, dict) else features
    words_len = tf.count_nonzero(words, 1, dtype=tf.int32)
    
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    batch_sz = tf.shape(words)[0]
    mask = tf.sign(words)
    
    
    with tf.variable_scope('Embedding'):
        embedding = tf.Variable(np.load('../vocab/char.npy'),
                                dtype=tf.float32,
                                name='fasttext_vectors')
        x = tf.nn.embedding_lookup(embedding, words)
    
    
    with tf.variable_scope('Encoder'):
        encoder = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
          params['rnn_units'], return_state=True, return_sequences=True, zero_output_for_mask=True))
        enc_out, state_fw_h, state_fw_c, state_bw_h, state_bw_c = encoder(x, mask=mask)

        enc_state = tf.concat((tf.reduce_max(enc_out, 1), state_fw_h, state_bw_h), axis=-1)
        enc_state = tf.layers.dense(enc_state, params['rnn_units'], params['activation'], name='state_fc')
        enc_out = tf.layers.dense(enc_out, params['rnn_units'], params['activation'], name='out_fc')
        enc_state = tf.nn.rnn_cell.LSTMStateTuple(c=enc_state, h=enc_state)
        if params['dec_layers'] > 1:
          enc_state = tuple(params['dec_layers'] * [enc_state])
    
    
    with tf.variable_scope('Decoder'):
        enc_id_t = tf.contrib.seq2seq.tile_batch(words, params['beam_width'])
        enc_out_t = tf.contrib.seq2seq.tile_batch(enc_out, params['beam_width'])
        enc_state_t = tf.contrib.seq2seq.tile_batch(enc_state, params['beam_width'])
        enc_seq_len_t = tf.contrib.seq2seq.tile_batch(words_len, params['beam_width'])
        
        cell = dec_cell(enc_out_t, enc_seq_len_t)
        output_proj = OutputProj(embedding, len(params['char2idx'])+1, enc_id_t, enc_out_t, is_beam_search=True)
        
        init_state = cell.zero_state(batch_sz*params['beam_width'], tf.float32).clone(
            cell_state=enc_state_t)
        
        decoder = BeamSearchDecoder(
            cell = cell,
            embedding = embedding,
            start_tokens = tf.tile(tf.constant([1], tf.int32), [batch_sz]),
            end_token = 2,
            initial_state = init_state,
            beam_width = params['beam_width'],
            output_layer = output_proj,
            length_penalty_weight = params['length_penalty'],
            coverage_penalty_weight = params['coverage_penalty'],)
        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder = decoder,
            maximum_iterations = params['max_len'],)
        
        return decoder_output.predicted_ids[:, :, :params['top_k']]

In [9]:
def model_fn(features, labels, mode, params):
    logits_or_ids = forward(features, labels, mode)
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=logits_or_ids)

In [10]:
params = {
    'model_dir': '../model/lstm_pointer',
    'export_dir': '../model/lstm_pointer_export',
    'vocab_path': '../vocab/char.txt',
    'rnn_units': 300,
    'max_len': 30,
    'activation': tf.nn.swish,
    'dec_layers': 1,
    'beam_width': 10,
    'top_k': 3,
    'length_penalty': .6,
    'coverage_penalty': .0,
}

In [11]:
def serving_input_receiver_fn():
    words = tf.placeholder(tf.int32, [None, None], 'words')
    features = {'words': words}
    receiver_tensors = features
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)


def get_vocab(f_path):
  word2idx = {}
  with open(f_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip('\n')
      word2idx[line] = i
  return word2idx

In [12]:
params['char2idx'] = get_vocab(params['vocab_path'])
params['idx2char'] = {idx: char for char, idx in params['char2idx'].items()}
estimator = tf.estimator.Estimator(model_fn, params['model_dir'])
estimator.export_saved_model(params['export_dir'], serving_input_receiver_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '../model/lstm_pointer', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3783ad5a90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
Instructions for 

b'../model/lstm_pointer_export/1599195155'