In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/free_chat/chinese_lccc/main')

In [None]:
%tensorflow_version 1.x
!pip install texar

In [None]:
import tensorflow as tf
import texar.tf as tx
import numpy as np
import copy

from texar.tf.modules import TransformerEncoder

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 1.15.2
GPU Enabled: False


In [None]:
def rnn_cell():
  def cell_fn():
    cell = tf.nn.rnn_cell.LSTMCell(params['rnn_units'],
                                  initializer=tf.orthogonal_initializer())
    return cell
  if params['dec_layers'] > 1:
    cells = []
    for i in range(params['dec_layers']):
      if i == params['dec_layers'] - 1:
        cells.append(cell_fn())
      else:
        cells.append(tf.nn.rnn_cell.ResidualWrapper(cell_fn(), residual_fn=lambda i,o: tf.concat((i,o), -1)))
    return tf.nn.rnn_cell.MultiRNNCell(cells)
  else:
    return cell_fn()

  
def dec_cell(enc_out, enc_seq_len):
  attn = tf.contrib.seq2seq.BahdanauAttention(
    num_units = params['rnn_units'],
    memory = enc_out,
    memory_sequence_length = enc_seq_len)
  
  return tf.contrib.seq2seq.AttentionWrapper(
    cell = rnn_cell(),
    attention_mechanism = attn,
    attention_layer_size = params['rnn_units'])

In [None]:
class TiedDense(tf.layers.Layer):
  def __init__(self, tied_embed, out_dim):
    super().__init__()
    self.tied_embed = tied_embed
    self.out_dim = out_dim
  
  def build(self, input_shape):
    self.bias = self.add_weight(name='bias',
                                shape=[self.out_dim],
                                trainable=True)
    super().build(input_shape)
  
  def call(self, inputs):
    x = tf.matmul(inputs, self.tied_embed, transpose_b=True)
    x = tf.nn.bias_add(x, self.bias)
    return x
  
  def compute_output_shape(self, input_shape):
    return input_shape[:-1].concatenate(self.out_dim)

In [None]:
def forward(features, labels, mode):
    words = features['words'] if isinstance(features, dict) else features
    words_len = tf.count_nonzero(words, 1, dtype=tf.int32)
    
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    batch_sz = tf.shape(words)[0]
    
  
    with tf.variable_scope('Embedding'):
        embedding = tf.Variable(np.load('../vocab/char.npy'),
                                dtype=tf.float32,
                                name='fasttext_vectors')
        embedding = tf.concat([tf.zeros(shape=[1, params['embed_dim']]), embedding[1:, :]], axis=0)
        x = tf.nn.embedding_lookup(embedding, words)
        pos_embedder = tx.modules.SinusoidsPositionEmbedder(
            position_size = params['max_len'] + 1,
            hparams = config_model.position_embedder_hparams)
        x = (x * config_model.hidden_dim ** 0.5) + pos_embedder(sequence_length=words_len)


    with tf.variable_scope('Encoder'):
        encoder = TransformerEncoder(hparams=config_model.encoder)
        enc_out = encoder(inputs=x, sequence_length=words_len, mode=tf.estimator.ModeKeys.PREDICT)
        enc_state = tf.reduce_max(enc_out, axis=1)
        enc_state = tf.nn.rnn_cell.LSTMStateTuple(c=enc_state, h=enc_state)
    
    
    with tf.variable_scope('Decoder'):
        output_proj = TiedDense(embedding, len(params['char2idx'])+1)

        enc_out_t = tf.contrib.seq2seq.tile_batch(enc_out, params['beam_width'])
        enc_state_t = tf.contrib.seq2seq.tile_batch(enc_state, params['beam_width'])
        enc_seq_len_t = tf.contrib.seq2seq.tile_batch(words_len, params['beam_width'])
        
        cell = dec_cell(enc_out_t, enc_seq_len_t)
        
        init_state = cell.zero_state(batch_sz*params['beam_width'], tf.float32).clone(
            cell_state=enc_state_t)
        
        decoder = tf.contrib.seq2seq.BeamSearchDecoder(
            cell = cell,
            embedding = embedding,
            start_tokens = tf.tile(tf.constant([1], tf.int32), [batch_sz]),
            end_token = 2,
            initial_state = init_state,
            beam_width = params['beam_width'],
            output_layer = output_proj,
            length_penalty_weight = params['length_penalty'],)
        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder = decoder,
            maximum_iterations = params['max_len'],)
        
        return decoder_output.predicted_ids[:, :, :params['top_k']]

In [None]:
def model_fn(features, labels, mode, params):
    logits_or_ids = forward(features, labels, mode)
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=logits_or_ids)

In [None]:
class config_model:
    hidden_dim = 300
    num_heads = 8
    dropout_rate = .2
    num_blocks = 6

    position_embedder_hparams = {
        'dim': hidden_dim
    }

    encoder = {
        'dim': hidden_dim,
        'embedding_dropout': dropout_rate,
        'residual_dropout': dropout_rate,
        'num_blocks': num_blocks,
        'initializer': {
            'type': 'variance_scaling_initializer',
            'kwargs': {
                'scale': 1.0,
                'mode': 'fan_avg',
                'distribution': 'uniform',
            },
        },
        'multihead_attention': {
            'dropout_rate': dropout_rate,
            'num_heads': num_heads,
            'output_dim': hidden_dim,
            'use_bias': True,
        },
        'poswise_feedforward': {
          'name': 'fnn',
          'layers': [
              {
                  'type': 'Dense',
                  'kwargs': {
                      'name': 'conv1',
                      'units': hidden_dim * 4,
                      'activation': 'gelu',
                      'use_bias': True,
                  },
              },
              {
                  'type': 'Dropout',
                  'kwargs': {
                      'rate': dropout_rate,
                  }
              },
              {
                  'type': 'Dense',
                  'kwargs': {
                      'name': 'conv2',
                      'units': hidden_dim,
                      'use_bias': True,
                  }
              }
          ],
        },
    }


params = {
    'model_dir': '../model/transformer_rnn',
    'export_dir': '../model/transformer_rnn_export',
    'vocab_path': '../vocab/char.txt',
    'dec_layers': 1,
    'rnn_units': 300,
    'max_len': 30,
    'embed_dim': config_model.hidden_dim,
    'beam_width': 10,
    'top_k': 3,
    'length_penalty': .0,
    'coverage_penalty': .0,
}

In [None]:
def serving_input_receiver_fn():
    words = tf.placeholder(tf.int32, [None, None], 'words')
    features = {'words': words}
    receiver_tensors = features
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)


def get_vocab(f_path):
  word2idx = {}
  with open(f_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip('\n')
      word2idx[line] = i
  return word2idx

In [None]:
params['char2idx'] = get_vocab(params['vocab_path'])
params['idx2char'] = {idx: char for char, idx in params['char2idx'].items()}
estimator = tf.estimator.Estimator(model_fn, params['model_dir'])
estimator.export_saved_model(params['export_dir'], serving_input_receiver_fn)



b'../model/transformer_rnn_export/1597653320'