In [1]:
# Building a seq2seq model

In [2]:
# defining model's inputs

def model_inputs():
    inputs = tf.placeholder(tf.int32, [None,None], name = 'input')
    targets = tf.placeholder(tf.int32, [None,None], name = 'target')
    lr = tf.placeholder(tf.float32, name = 'learning_rate')
    keep_prob = tf.placehodler(tf.float32, name = 'keep_prob') # neurons dropout proportion
    
    return inputs, targets, lr, keep_prob

In [3]:
def processed_targets(targets, word2int, batch_size):
    """
    - processes the targets (actual answers) in such a way that decoder understands them
    - each answer starts off with a special token <SOS>
    - answers much be fed to Neural Network in batches. And we'll declare the batch size
    - word2int: dictionary that maps words to integers, here tokens to integers!
    - targets: actual answers to be processed
    - batch_size: number of answers in a batch
    - We'll convert first index of the answer into vector embedding containing integer for <SOS>\
    - and we'll chop off the last column, and then concatenate both the first vector column and embedding matrix except the last column
    
    """
    left_side = tf.fill([batch_size, 1], word2int['<SOS>'])
    right_side = tf.strided_slice(targets, [0,0], [batch_size, 1, [1,1]])
    preprocesses_targets = tf.concat([left_side, right_side], 1)
    
    return preprocessed_targets

In [4]:
def encoder_rnn_layer(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_length):
    """
    rnn inputs: RNN inputs defined above, i.e inputs, targets, lr, keep_prob
    rnn_size: size of the the Tensors
    num_layes: num of hidden layers
    keep_prob: regularization constant, Neurons dropout proportion
    sequence_length: list of length of each question in the batch
    
    stacked LSTM: Multiple hidden LSTM layers, and each layer contains multiple memory cells
    """
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size) # create a lstm cell with size equals to rnn_size
    lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob) # apply dropout on lstm, with dropout proportion = keep_prob
    encoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers) # encoder_cell is going to be equals to the lstm_dropout cell times the number of layers we plan to have!
    _, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = encoder_cell,
                                                       cell_bw = encoder_cell,
                                                        sequence_length = sequence_length,
                                                        inputs = rnn_inputs,
                                                        dtype = tf.float32)
    
    return encoder_state

In [5]:
# Decoder RNN Layer
# 1-Decoder training data
# 2-Decoder Cross validation set
# 3-Decoder RNN layer

# Here in the fucntion we'll make use of Tensorflow embeddings. An Embedding is basically a vector representation of text.
# The text is first converted into Integer id's, and then applying embedding method from tensorflow it reports back the context
# vector of the text id's passed, which id mapped over vector space and maps every single word in vector space!

def decode_training_set(encoder_state, decoder_cell, decoder_embedded_input, sequence_length, decoding_scope, output_function,
                      keep_prob, batch_size):
    """
    encoder_state: state of the encoder to be pass in as input to decoder
    decoder_cell: cell of the decoder
    decoder_embedded_input: input of the decoder that is mapped from words to vectors in vector space
    sequence_lenght: length of the list of passed words
    decoding_scope: to create a tensorflow variable from
    output_function: to report back the output of decoder
    keep_prob: neurons dropout ratio
    batch_size: size of the batch to be passed to decoder!
    """
    
    attention_state = tf.zeros([batch_size, 1, decoder_cell.output]) # initializes the attention_states
    # batch_size -> number of rows
    # 1 -> number of columns
    # decoder_cell.output -> number of elements
    
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_state, attention_option = 'bahdanau', num_units = decoder_cell.output)
    # attention_keys: keys to be compared with the target state
    # attention_values: values that we'll used to create the context vector
    # attention_score_function: used to compute the similarities between the Keys and the Target states
    # attention_construct_function: used to build the attention state
    
    training_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_train(encoder_state[0],
                                                                             attention_keys,
                                                                             attention_values,
                                                                             attention_score_function,
                                                                             attention_construct_function,
                                                                             name = 'attn_dec_train')
    
    decoder_output, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                             training_decoder_function,
                                                                                                             decoder_embedded_input,
                                                                                                             sequence_length,
                                                                                                             scope = decoding_scope)
    
    decoder_output_dropout = tf.nn.dropout(decoder_output, keep_prob)
    # decoder_output: object on which dropout has to be applied
    # keep_prob: ration of dropout be applied on decoder_output
    
    return output_function(decoder_output_dropout)
    

In [6]:
def decode_test_set(encoder_state, decoder_cell, decoder_embedding_matrix, sos_id, eos_id, maximum_length, num_words,
                   decoding_scope, output_function, keep_prob, batch_size):
    """
    sos_id: id of the start of sentence token
    eos_id: id of the end of the senetence token
    maximum_length: maximum length of the answer
    num_words: total number of words of all the answers. For this we have to take answers_words_2_int dictionary
    And we are using these extra arguments because of the 'infer' method we are gonna use in this current funciton
    rest of the things are same as above method!
    """
    
    attention_state = tf.zeros([batch_size, 1, decoder_cell.output]) # initializes the attention_states
    # batch_size -> number of rows
    # 1 -> number of columns
    # decoder_cell.output -> number of elements
    
    attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_state, attention_option = 'bahdanau', num_units = decoder_cell.output)
    # attention_keys: keys to be compared with the target state
    # attention_values: values that we'll used to create the context vector
    # attention_score_function: used to compute the similarities between the Keys and the Target states
    # attention_construct_function: used to build the attention state
    
    test_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_inference(output_function,
                                                                             encoder_state[0],
                                                                             attention_keys,
                                                                             attention_values,
                                                                             attention_score_function,
                                                                             attention_construct_function,
                                                                             decoder_embedding_matrix,
                                                                             sos_id,
                                                                             eos_id, maximum_length, num_words, 
                                                                             name = 'attn_dec_inf')
    
    test_predictions, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
                                                                                                             test_decoder_function,
                                                                                                             decoder_embedded_input,
                                                                                                             scope = decoding_scope)
    
    return test_predictions

In [12]:
# Building RNN decoder

def decoder_rnn_layer(decoder_embedded_input, decoder_embedding_matrix, encoder_state, num_words, sequence_length,
                     rnn_size, num_layers, word2int, keep_prob, batch_size):
    """
    decoder_embedded_input: input in the formate that decoder receives
    decoder_embedding_matrix: decoded embedding matrix for decoder
    encoder_state: last hidden state of encoder that goes into decoder
    rnn_size: size of the the Tensors
    num_layes: num of hidden layers
    num_words: total number of words in corpus of answers
    keep_prob: regularization constant, Neurons dropout proportion
    sequence_length: list of length of each question in the batch
    batch_size: size of the batch
    word2int: integer ids for each word
    
    """
    with tf.variable_scope("decoding") as decoding_scope:
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        lstm_dropout = tf.contrib.rnn.DropourtWrapper(lstm, input_keep_prob = keep_prob) # applied dropout on lstm, with percentage of keep_prob
        decoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers)
        weights = tf.truncated_normal_initializer(stddev = 0.1) # initizlizing the weights
        biases = tf.zeros.initializer() # initializing the biases
        output_function = lambda x: tf.contrib.layers.fully_connected(x,
                                                                     num_words,
                                                                     None,
                                                                     scope = decoding_scope,
                                                                     weights_initializers = weights,
                                                                     biases_initializer = biases,)
        training_predictions = decode_training_set(encoder_state,
                                                  decoder_cell,
                                                  decoder_embedded_input,
                                                  sequence_length,
                                                  decoding_scope,
                                                  output_function,
                                                  keep_prob,
                                                  batch_size)
        
        decoding_scope.reuse_variables()
        
        

        test_predictions = decode_test_set(encoder_state, decoder_cell,
                                                  decoder_embedding_matrix,
                                                  word2int['<SOS>'],
                                                  word2int['<EOS>'],
                                                  sequence_length -1,
                                                  num_words,
                                                  decoding_scope,
                                                  output_function,
                                                  keep_prob,
                                                  batch_size)
        

    return training_predictions, test_predictions

In [15]:
# SEQ2SEQ MODEL!

def seq2seq_model(inputs, targets, keep_prob, batch_size, sequence_length, answers_num_words, questions_num_words,
                 encoder_embedding_size):
    
    """
    inputs: questions to chatbot
    targets: actual answers of the questions
    keep_prob: dropout ratio
    batch_size: size of the batch
    sequence_length: length of the sequence
    answers_num_words: num of words in all the answers
    questions_num_words: num of words in all the questions
    encoder_embedding_size: num of dimensions of the embedding matrix for the encoder
    decoder_embedding_size: num of dimensions of the embedding matrix for the decoder
    rnn_size: num of input tensors
    num_layers: num of layers with dropout applied
    questionswords2int: dictionary that converst questions words into their integer ids
    
    """
    
    # Lets put all together now!
    # firstly we need encoder embedded inputs
    encoder_embedded_inputs = tf.contrib.layers.embed-sequence(inputs, answers_num_words + 1, encoder_embedding_size, 
                                                              initializer = tf.random_uniform_initializer(0,1))
    
    # inputs: inputs we actually want to embed
    # total number of words of the answers
    # encoder_embedding_size: num of dimensions in the embedding matrix for encoder
    # initializer for our sequence to sequence model
    
    encoder_state = encoder_rnn_layer(encoder_embedded_input, rnn_size, num_layers, keep_prob, sequence_length)
    preprocessed_targets = preprocess_targets(targets, questionswords2int, batch_size)
    decoder_embeddings_matrix = tf.Variable(tf.random_uniform([questions_num_words + 1, decoder_embedding_size], 0, 1))
    # here questions_num_words and decoder_embedding_size are the size of dimesions, for initializing the decoder_embeddings_matrix
    # 0, 1 -> lower bounds and upper bounds of the random initializer
    
    decoder_embedded_input = tf.nn.embedding_lookup(decoder_embeddings_matrix, preprocessed_targets)
    training_predictions, test_predictions = decoder_rnn_layer(decoder_embedded_input, decoder_embeddings_matrix,
                                                              encoder_state,
                                                              questions_num_words,
                                                              sequence_length,
                                                              rnn_size,
                                                              num_layers,
                                                              questionsword2int,
                                                              keep_prob,
                                                              batch_size)
    
    return training_predictions, test_predictions