In [1]:
"""
We use following lines because we are running on Google Colab
If you are running notebook on a local computer, you don't need this cell
"""
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/free_chat/chinese_gaoq1/main')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
%tensorflow_version 1.x

In [3]:
import tensorflow as tf
import numpy as np

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 1.15.0
GPU Enabled: False


In [0]:
def rnn_cell():
    def cell_fn():
        cell = tf.nn.rnn_cell.LSTMCell(params['rnn_units'],
                                       initializer=tf.orthogonal_initializer())
        return cell
    if params['dec_layers'] > 1:
      cells = []
      for i in range(params['dec_layers']):
        if i == params['dec_layers'] - 1:
          cells.append(cell_fn())
        else:
          cells.append(tf.nn.rnn_cell.ResidualWrapper(cell_fn(), residual_fn=lambda i,o: tf.concat((i,o), -1)))
      return tf.nn.rnn_cell.MultiRNNCell(cells)
    else:
      return cell_fn()

  
def dec_cell(enc_out, enc_seq_len):
    attn = tf.contrib.seq2seq.BahdanauAttention(
        num_units = params['rnn_units'],
        memory = enc_out,
        memory_sequence_length = enc_seq_len)
    
    return tf.contrib.seq2seq.AttentionWrapper(
        cell = rnn_cell(),
        attention_mechanism = attn,
        attention_layer_size = params['rnn_units'])

In [0]:
class TiedDense(tf.layers.Layer):
  def __init__(self, tied_embed, out_dim):
    super().__init__()
    self.tied_embed = tied_embed
    self.out_dim = out_dim
  
  def build(self, input_shape):
    self.bias = self.add_weight(name='bias',
                                shape=[self.out_dim],
                                trainable=True)
    super().build(input_shape)
  
  def call(self, inputs):
    x = tf.matmul(inputs, self.tied_embed, transpose_b=True)
    x = tf.nn.bias_add(x, self.bias)
    return x
  
  def compute_output_shape(self, input_shape):
    return input_shape[:-1].concatenate(self.out_dim)

In [0]:
def forward(features, labels, mode):
    if isinstance(features, dict):
      words = features['words']
    else:
      words = features
    
    words_len = tf.count_nonzero(words, 1, dtype=tf.int32)
    
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    batch_sz = tf.shape(words)[0]
    
    
    with tf.variable_scope('Embedding'):
        embedding = tf.Variable(np.load('../vocab/char.npy'),
                                dtype=tf.float32,
                                name='fasttext_vectors')
        x = tf.nn.embedding_lookup(embedding, words)
    
    
    with tf.variable_scope('Encoder'):
        t = tf.transpose(x, perm=[1, 0, 2])  # Need time-major
        lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['rnn_units'])
        lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['rnn_units'])
        lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
        o_fw, s_fw = lstm_cell_fw(t, dtype=tf.float32, sequence_length=words_len)
        o_bw, s_bw = lstm_cell_bw(t, dtype=tf.float32, sequence_length=words_len)
        enc_out = tf.concat([o_fw, o_bw], axis=-1)
        enc_out = tf.transpose(enc_out, perm=[1, 0, 2])

        enc_state = tf.layers.dense(tf.concat((s_fw.h, s_bw.h), -1), params['rnn_units'], params['activation'], name='state_fc')
        enc_state = tf.nn.rnn_cell.LSTMStateTuple(c=enc_state, h=enc_state)
        if params['dec_layers'] > 1:
          enc_state = tuple(params['dec_layers'] * [enc_state])
    
    
    with tf.variable_scope('Decoder'):
        output_proj = TiedDense(embedding, len(params['char2idx'])+1)
        
        enc_out_t = tf.contrib.seq2seq.tile_batch(enc_out, params['beam_width'])
        enc_state_t = tf.contrib.seq2seq.tile_batch(enc_state, params['beam_width'])
        enc_seq_len_t = tf.contrib.seq2seq.tile_batch(words_len, params['beam_width'])
        
        cell = dec_cell(enc_out_t, enc_seq_len_t)
        
        init_state = cell.zero_state(batch_sz*params['beam_width'], tf.float32).clone(
            cell_state=enc_state_t)
        
        decoder = tf.contrib.seq2seq.BeamSearchDecoder(
            cell = cell,
            embedding = embedding,
            start_tokens = tf.tile(tf.constant([1], tf.int32), [batch_sz]),
            end_token = 2,
            initial_state = init_state,
            beam_width = params['beam_width'],
            output_layer = output_proj,
            length_penalty_weight = params['length_penalty'],
            coverage_penalty_weight = params['coverage_penalty'],)
        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder = decoder,
            maximum_iterations = params['max_len'],)
        
        return decoder_output.predicted_ids[:, :, :params['top_k']]

In [0]:
def model_fn(features, labels, mode, params):
    logits_or_ids = forward(features, labels, mode)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=logits_or_ids)

In [0]:
params = {
    'model_dir': '../model/lstm_seq2seq',
    'export_dir': '../model/lstm_seq2seq_export',
    'vocab_path': '../vocab/char.txt',
    'rnn_units': 300,
    'max_len': 10,
    'activation': tf.nn.relu,
    'dec_layers': 1,
    'beam_width': 5,
    'top_k': 3,
    'length_penalty': .6,
    'coverage_penalty': .0,
}

In [0]:
def serving_input_receiver_fn():
    words = tf.placeholder(tf.int32, [None, None], 'words')
    
    features = {'words': words}
    receiver_tensors = features
    
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

In [0]:
def get_vocab(f_path):
  word2idx = {}
  with open(f_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip('\n')
      word2idx[line] = i
  return word2idx

In [0]:
params['char2idx'] = get_vocab(params['vocab_path'])
params['idx2char'] = {idx: char for char, idx in params['char2idx'].items()}

In [12]:
estimator = tf.estimator.Estimator(model_fn, params['model_dir'])
estimator.export_saved_model(params['export_dir'], serving_input_receiver_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '../model/lstm_seq2seq', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fcf98f33ac8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
Instructions for 

b'../model/lstm_seq2seq_export/1577431850'