In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
# Special vocabulary symbols:
# PAD is used to pad a sequence to a fixed size
# GO is for the end of the encoding
# EOS is for the end of decoding
# UNK is for out of vocabulary words
_PAD, _GO, _EOS, _UNK = "_PAD", "_GO", "_EOS", "_UNK"
_START_VOCAB = [_PAD, _GO, _EOS, _UNK]
PAD_ID,GO_ID, EOS_ID, UNK_ID = range(4)
infer_batch_size=100
class Model(object):
    def __init__(self, hps, gpu_mode=True, reuse=False):
        self.gpu_mode=gpu_mode
        tf.reset_default_graph()
        
    def _create_embedding(self,x, vocab_size, embed_size, embed_matrix=None):

        # Creating an embedding matrix if one isn't given
        if embed_matrix is None:
            # This is a big matrix
            embed_matrix = tf.get_variable(
                name="embedding_matrix",
                shape=[vocab_size, embed_size],
                dtype=tf.float32,
                initializer=tf.random_uniform_initializer(-1.0, 1.0))

        # Perform the lookup of ids in x and perform the embedding to embed_size
        # [batch_size, max_time, embed_size]
        embed = tf.nn.embedding_lookup(embed_matrix, x)

        return embed, embed_matrix


    def _create_rnn_cell(self,n_neurons, n_layers, keep_prob):

        import tensorflow.contrib.rnn as rnn

        cell_fw = rnn.LayerNormBasicLSTMCell(
            num_units=n_neurons, dropout_keep_prob=keep_prob)
        # Build deeper recurrent self.net if using more than 1 layer
        if n_layers > 1:
            cells = [cell_fw]
            for layer_i in range(1, n_layers):
                with tf.variable_scope('{}'.format(layer_i)):
                    cell_fw = rnn.LayerNormBasicLSTMCell(
                        num_units=n_neurons, dropout_keep_prob=keep_prob)
                    cells.append(cell_fw)
            cell_fw = rnn.MultiRNNCell(cells)
        return cell_fw


    def super_linear(self,x,
                     output_size,
                     scope=None,
                     reuse=tf.AUTO_REUSE,
                     init_w='ortho',
                     weight_start=0.0,
                     use_bias=True,
                     bias_start=0.0,
                     input_size=None):
        """Performs linear operation. Uses ortho init defined earlier."""
        shape = x.get_shape().as_list()
        with tf.variable_scope(scope or 'linear'):
            if reuse is True:
                tf.get_variable_scope().reuse_variables()

            w_init = None  # uniform
            if input_size is None:
                x_size = shape[1]
            else:
                x_size = input_size
            if init_w == 'zeros':
                w_init = tf.constant_initializer(0.0)
            elif init_w == 'constant':
                w_init = tf.constant_initializer(weight_start)
            elif init_w == 'gaussian':
                w_init = tf.random_normal_initializer(stddev=weight_start)
            elif init_w == 'ortho':
                w_init = lstm_ortho_initializer(1.0)

            w = tf.get_variable(
            'super_linear_w', [x_size, output_size], tf.float32, initializer=w_init)
            if use_bias:
                b = tf.get_variable(
              'super_linear_b', [output_size],
              tf.float32,
              initializer=tf.constant_initializer(bias_start))
                return tf.matmul(x, w) + b
        return tf.matmul(x, w)
    

    def _create_encoder(self,embed, lengths, batch_size, n_enc_neurons, n_layers,
                        keep_prob,z_size=512):

        # Create the RNN Cells for encoder
        with tf.variable_scope('forward'):
            cell_fw = self._create_rnn_cell(n_enc_neurons, n_layers, keep_prob)

        # Create the internal multi-layer cell for the backward RNN.
        with tf.variable_scope('backward'):
            cell_bw = self._create_rnn_cell(n_enc_neurons, n_layers, keep_prob)

        # Now hookup the cells to the input
        # [batch_size, max_time, embed_size]
        (outputs, final_state) = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=embed,
            sequence_length=lengths,
            time_major=False,
            dtype=tf.float32)

        last_state_fw, last_state_bw = final_state
        layer_index=0
        #if isinstance(last_state_fw[layer_index],tf.contrib.rnn.LSTMStateTuple):
         #   encoder_state_c = tf.concat(values=(last_state_fw[layer_index].c,last_state_bw[layer_index].c),axis=1,name="encoder_fw_state_c")
         #   encoder_state_h = tf.concat(values=(last_state_fw[layer_index].h,last_state_bw[layer_index].h),axis=1,name="encoder_fw_state_h")
        #last_h_fw = cell_fw.get_output_at(last_state_fw)
        #last_h_bw = cell_bw.get_output_at(last_state_bw)
        encoder_state_c = tf.concat(values=(last_state_fw.c,last_state_bw.c),axis=1,name="encoder_fw_state_c")
        encoder_state_h = tf.concat(values=(last_state_fw.h,last_state_bw.h),axis=1,name="encoder_fw_state_h")
    
        mu_h = self.super_linear(
            encoder_state_h,
            z_size,
            input_size=n_enc_neurons * 2,  # bi-dir, so x2
            scope='ENC_RNN_mu_h',
            init_w='gaussian',
            weight_start=0.001)
        presig_h = self.super_linear(
            encoder_state_h,
            z_size,
            input_size=n_enc_neurons * 2,  # bi-dir, so x2
            scope='ENC_RNN_sigma_h',
            init_w='gaussian',
            weight_start=0.001)
    
    
        sigma_h = tf.exp(presig_h / 2.0)  
        eps_h = tf.random_normal(
              (batch_size, z_size), 0.0, 1.0, dtype=tf.float32)
        batch_z_h = mu_h + tf.multiply(sigma_h, eps_h)
        sampled_z=tf.identity(batch_z_h,name="batch_z")
    
    
        kl_loss_z_h = -0.5 * tf.reduce_sum(
            1.0 + 2.0 * sigma_h - tf.square(mu_h) - tf.exp(2.0 * sigma_h),
            1)
        
        print("KL Loss shape:")
        print(kl_loss_z_h.get_shape())
        
        print("mu :",mu_h.get_shape())
        
        enc=[]
        print("batch_size:",batch_size)
        zero_c=tf.zeros((batch_size, z_size), dtype=tf.float32)
        encoder_state = tf.contrib.rnn.LSTMStateTuple(c=zero_c, h=sampled_z)
        return outputs,encoder_state,final_state[0],kl_loss_z_h,mu_h,sigma_h


    def _create_decoder(self,cells,
                        batch_size,
                        encoder_outputs,
                        encoder_state,
                        encoder_lengths,
                        decoding_inputs,
                        decoding_lengths,
                        embed_matrix,
                        target_vocab_size,
                        scope,
                        max_sequence_size,use_attention=False):

        from tensorflow.python.layers.core import Dense

        # Output projection
        output_layer = Dense(target_vocab_size, name='output_projection')

        # Setup Attention
        if use_attention:
            attn_mech = tf.contrib.seq2seq.LuongAttention(
                cells.output_size, encoder_outputs, encoder_lengths, scale=True)
            cells = tf.contrib.seq2seq.AttentionWrapper(
                cell=cells,
                attention_mechanism=attn_mech,
                attention_layer_size=cells.output_size,
                alignment_history=False)
            initial_state = cells.zero_state(
                dtype=tf.float32, batch_size=batch_size)
            initial_state = initial_state.clone(cell_state=encoder_state)
        else:
            initial_state = encoder_state

        # Setup training a build decoder
        helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=decoding_inputs,
            sequence_length=decoding_lengths,
            time_major=False)
        #sampled_z=tf.identity(initial_state,name="sampled_z1")
        train_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=cells,
            helper=helper,
            initial_state=initial_state,
            output_layer=output_layer)
        train_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            train_decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=max_sequence_size)
        train_logits = tf.identity(train_outputs.rnn_output, name='train_logits')

        # Setup inference and build decoder
        scope.reuse_variables()
        start_tokens = tf.tile(tf.constant([GO_ID], dtype=tf.int32), [batch_size])
        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=embed_matrix, start_tokens=start_tokens, end_token=EOS_ID)
        infer_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=cells,
            helper=helper,
            initial_state=initial_state,
            output_layer=output_layer)
        infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            infer_decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=max_sequence_size)
        infer_logits = tf.identity(infer_outputs.sample_id, name='infer_logits')

        return output_layer,train_logits, infer_logits


    def create_model(self,source_vocab_size=10000,
                     target_vocab_size=10000,
                     input_embed_size=512,
                     target_embed_size=512,
                     share_input_and_target_embedding=True,
                     n_neurons=512,
                     n_layers=1,
                     use_attention=True,
                     max_sequence_size=30):

        n_enc_neurons = n_neurons
        n_dec_neurons = n_neurons

        # First sentence (i.e. input, original language sentence before translation)
        # [batch_size, max_time]
        source = tf.placeholder(tf.int32, shape=(None, None), name='source')

        # User should also pass in the sequence lengths
        source_lengths = tf.placeholder(
            tf.int32, shape=(None,), name='source_lengths')

        # Second sentence (i.e. reply, translation, etc...)
        # [batch_size, max_time]
        target = tf.placeholder(tf.int32, shape=(None, None), name='target')

        # User should also pass in the sequence lengths
        target_lengths = tf.placeholder(
            tf.int32, shape=(None,), name='target_lengths')

        # Dropout
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')

        # Symbolic shapes
        batch_size, sequence_length = tf.unstack(tf.shape(source))

        # Get the input to the decoder by removing last element
        # and adding a 'go' symbol as first element
        with tf.variable_scope('target/slicing'):
            slice = tf.slice(target, [0, 0], [batch_size, -1])
            decoder_input = tf.concat([tf.fill([batch_size, 1], GO_ID), slice], 1)

        # Embed word ids to target embedding
        with tf.variable_scope('source/embedding'):
            source_embed, source_embed_matrix = self._create_embedding(
                x=source, vocab_size=source_vocab_size, embed_size=input_embed_size)

        # Embed word ids for target embedding
        with tf.variable_scope('target/embedding'):
            # Check if we need a new embedding matrix or not.  If we are for
            # instance translating to another language, then we'd need different
            # vocabularies for the input and outputs, and so new embeddings.
            # However if we are for instance building a chatbot with the same
            # language, then it doesn't make sense to have different embeddings and
            # we should share them.
            if (share_input_and_target_embedding and
                    source_vocab_size == target_vocab_size):
                target_input_embed, target_embed_matrix = self._create_embedding(
                    x=decoder_input,
                    vocab_size=target_vocab_size,
                    embed_size=target_embed_size,
                    embed_matrix=source_embed_matrix)
            elif source_vocab_size != target_vocab_size:
                raise ValueError(
                    'source_vocab_size must equal target_vocab_size if ' +
                    'sharing input and target embeddings')
            else:
                target_input_embed, target_embed_matrix = self._create_embedding(
                    x=target,
                    vocab_size=target_vocab_size,
                    embed_size=target_embed_size)

        # Build the encoder
        with tf.variable_scope('encoder'):
            #return outputs,mu_c,sigma_c,encoder_state,final_state[0],batch_z_c
            encoder_outputs,encoder_state,final_state,kl_loss_h,mu,sigma = self._create_encoder(
                embed=source_embed,
                lengths=source_lengths,
                batch_size=batch_size,
                n_enc_neurons=n_enc_neurons,
                n_layers=n_layers,
                keep_prob=keep_prob)

        # Build the decoder
        with tf.variable_scope('decoder') as scope:
            cell_fw = self._create_rnn_cell(n_dec_neurons, n_layers, keep_prob)
            output_layer,decoding_train_logits, decoding_infer_logits = self._create_decoder(
                cells=cell_fw,
                batch_size=batch_size,
                encoder_outputs=encoder_outputs[0],
                #encoder_state=encoder_state[0],
                encoder_state=encoder_state,
                encoder_lengths=source_lengths,
                decoding_inputs=target_input_embed,
                decoding_lengths=target_lengths,
                embed_matrix=target_embed_matrix,
                target_vocab_size=target_vocab_size,
                scope=scope,
                max_sequence_size=max_sequence_size)
            #cell_tensor=tf.identity(cell_fw,name="saved_cell")
        with tf.variable_scope('loss'):
            weights = tf.cast(tf.sequence_mask(target_lengths), tf.float32)
            kl_loss = tf.reduce_mean(kl_loss_h)
            loss = tf.contrib.seq2seq.sequence_loss(
                logits=tf.reshape(decoding_train_logits, [
                    batch_size, tf.reduce_max(target_lengths), target_vocab_size
                ]),
                targets=target,
                weights=weights) + tf.reduce_mean(kl_loss_h)

        return {
            'loss': loss,
            'source': source,
            'source_lengths': source_lengths,
            'target': target,
            'target_lengths': target_lengths,
            'keep_prob': keep_prob,
            'embedding_matrix':source_embed_matrix,
            'thought_vector': encoder_state,
            'decoder': decoding_infer_logits,
            'decoder_cells':cell_fw,
            'output_layer':output_layer,
            'prob_state':encoder_state,
            'final_state':final_state,
            'kl_loss':kl_loss,
            'mu':mu,
            'sigma':sigma
            
        }
    


    def batch_generator(self,sources,
                        targets,
                        source_lengths,
                        target_lengths,
                        batch_size=10):

        idxs = np.random.permutation(np.arange(len(sources)))
        n_batches = len(idxs) // batch_size
        for batch_i in range(n_batches):
            this_idxs = idxs[batch_i * batch_size:(batch_i + 1) * batch_size]
            this_sources, this_targets = sources[this_idxs, :], targets[
                this_idxs, :]
            this_source_lengths, this_target_lengths = source_lengths[
                this_idxs], target_lengths[this_idxs]
            yield (this_sources[:, :np.max(this_source_lengths)],
                   this_targets[:, :np.max(this_target_lengths)],
                   this_source_lengths, this_target_lengths)


    def preprocess(self,text,vocab=None, min_count=5, min_length=3, max_length=30):

        sentences = [el for s in text for el in nltk.sent_tokenize(s)]
    
        # We'll first tokenize each sentence into words to get a sense of
        # how long each sentence is:
        words = [[word.lower() for word in nltk.word_tokenize(s)]
                 for s in sentences]

        # Then see how long each sentence is:
        lengths = np.array([len(s) for s in words])

        good_idxs = np.where((lengths >= min_length) & (lengths < max_length))[0]
        dataset = [words[idx] for idx in good_idxs]
        fdist = nltk.FreqDist([word for sentence in dataset for word in sentence])

        vocab_counts = [el for el in fdist.most_common() if el[1] > min_count]
        if vocab is None:
        # First sort the vocabulary
            vocab = [v[0] for v in vocab_counts]
            vocab.sort()

            # Now add the special symbols:
            vocab = _START_VOCAB + vocab

            # Then create the word to id mapping
            vocab = {k: v for v, k in enumerate(vocab)}

            with open('vocab.pkl', 'wb') as fp:
                pickle.dump(vocab, fp)

        unked = self.word2id(dataset, vocab)
        return unked, vocab


    def word2id(self,words, vocab):

        unked = []
        for s in words:
            this_sentence = [vocab.get(w, UNK_ID) for w in s]
            unked.append(this_sentence)
        return unked


    def id2word(self,ids, vocab):

        words = []
        id2words = {v: k for k, v in vocab.items()}
        for s in ids:
            this_sentence = [id2words.get(w) for w in s]
            words.append(this_sentence)
        return words

    def decode(self,sess,tokens, lengths):
        decoding = sess.run(
            self.net['decoder'],
            feed_dict={
                self.net['keep_prob']: 1.0,
                self.net['source']: tokens,
                self.net['source_lengths']: lengths
            })
        print('input:', " ".join(self.id2word([tokens[0]], self.vocab)[0]))
        print('output:', " ".join(self.id2word([decoding[0]], self.vocab)[0]))
        print('ouput codes:',decoding[0])
            
    def train(self,text,
              max_sequence_size=20,
              use_attention=False,
              min_count=0,
              min_length=5,
              n_epochs=1000,
              batch_size=50,z_size=512):

        # Preprocess it to word IDs including UNKs for out of vocabulary words
        self.unked, self.vocab = self.preprocess(
            text,
            min_count=min_count,
            min_length=min_length,
            max_length=max_sequence_size - 1)

        # Get the vocabulary size
        vocab_size = len(self.vocab)
        print(vocab_size)
        # Create input output pairs formed by neighboring sentences of dialog
        #sources_list, targets_list = unked[:-1], unked[1:]
        sources_list = self.unked
        targets_list = self.unked
        # Store the final lengths
        source_lengths = np.zeros((len(sources_list)), dtype=np.int32)
        target_lengths = np.zeros((len(targets_list)), dtype=np.int32)
        sources = np.ones(
            (len(sources_list), max_sequence_size), dtype=np.int32) * PAD_ID
        targets = np.ones(
            (len(targets_list), max_sequence_size), dtype=np.int32) * PAD_ID

        for i, (source_i, target_i) in enumerate(zip(sources_list, targets_list)):
            el = source_i
            source_lengths[i] = len(el)
            sources[i, :len(el)] = el

            el = target_i + [EOS_ID]
            target_lengths[i] = len(el)
            targets[i, :len(el)] = el

        sess = tf.Session()

        self.net = self.create_model(
            max_sequence_size=max_sequence_size,
            use_attention=use_attention,
            source_vocab_size=vocab_size,
            target_vocab_size=vocab_size)

        learning_rate = tf.placeholder(tf.float32, name='learning_rate')
        opt = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(self.net['loss'])
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        
        #output_layer = Dense(vocab_size, name='output_projection_prediction')
        infer_cells_ = self.net['decoder_cells']
        infer_output_layer_ = self.net['output_layer']
        start_tokens_ = tf.tile(tf.constant([GO_ID], dtype=tf.int32), [infer_batch_size])
        zero_c=tf.zeros((infer_batch_size, z_size), dtype=tf.float32)
        z=tf.placeholder(tf.float32, shape=(infer_batch_size, z_size), name='z_sampled')
        infer_encoder_state_ = tf.contrib.rnn.LSTMStateTuple(c=zero_c, h=z)
        infer_helper_ = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=self.net['embedding_matrix'], start_tokens=start_tokens_, end_token=EOS_ID)
        infer_decoder_ = tf.contrib.seq2seq.BasicDecoder(
                cell=infer_cells_,
                helper=infer_helper_,
                initial_state=infer_encoder_state_,
                output_layer=infer_output_layer_)
        infer_outputs_, _, _ = tf.contrib.seq2seq.dynamic_decode(
                infer_decoder_,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=max_sequence_size)
        
        infer_logits_ = tf.identity(infer_outputs_.sample_id, name='infer_logits_pred')
        sess.run(init_op)
        saver = tf.train.Saver()

        

        current_learning_rate = 0.01
        epoch_i=0
        test_sent=[]
        test_sent_len=0
        try:
            while not coord.should_stop():
            #for epoch_i in range(n_epochs):
                total = 0
                for it_i, (this_sources, this_targets, this_source_lengths, this_target_lengths) \
                    in enumerate(self.batch_generator(
                        sources, targets, source_lengths, target_lengths, batch_size=batch_size)):
                    if it_i % 1000 == 0:
                        current_learning_rate = max(0.0001,
                                                    current_learning_rate * 0.99)
                        if(epoch_i==1):
                            test_sent=this_sources[0:1]
                            test_sent_len=this_source_lengths[0:1]
                        self.decode(sess,this_sources[0:1], this_source_lengths[0:1])
                    l,kl,_ = sess.run(
                        [self.net['loss'],self.net['kl_loss'], opt],
                        feed_dict={
                            learning_rate: current_learning_rate,
                            self.net['keep_prob']: 0.8,
                            self.net['source']: this_sources,
                            self.net['target']: this_targets,
                            self.net['source_lengths']: this_source_lengths,
                            self.net['target_lengths']: this_target_lengths
                        })

                    prob_state,final_state,mu_,sigma_=sess.run([self.net['prob_state'],self.net['final_state'],self.net['mu'],self.net['sigma']],feed_dict={
                            learning_rate: current_learning_rate,
                            self.net['keep_prob']: 0.8,
                            self.net['source']: this_sources,
                            self.net['target']: this_targets,
                            self.net['source_lengths']: this_source_lengths,
                            self.net['target_lengths']: this_target_lengths
                        })

                    total = total + l
                    print('{}: {}'.format(it_i, total / (it_i + 1)), end='\r')
                    print("KL Loss:",kl)
                # End of epoch, save
                    print('epoch {}: {}'.format(epoch_i, total / it_i))
                epoch_i=epoch_i+1
                if epoch_i==10:
                    coord.request_stop()
                                
            
        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            # One of the threads has issued an exception.  So let's tell all the
            # threads to shutdown.
            coord.request_stop()

        # Wait until all threads have finished.
        coord.join(threads)
        saver.save(sess, './vae-rnn.ckpt', global_step=epoch_i)
        def slerp(p0, p1, t):
            omega = np.arccos(np.dot(p0 / np.linalg.norm(p0), p1 / np.linalg.norm(p1)))
            so = np.sin(omega)
            return np.sin((1.0 - t) * omega) / so * p0 + np.sin(t * omega) / so * p1
    
        def get_thought_vector(txt,txt_len):
            thgt_vect=sess.run(self.net['thought_vector'],feed_dict={self.net['keep_prob']: 1,self.net['source']: txt,
                        self.net['source_lengths']: txt_len})
            return thgt_vect
    

        print("mu shape:",mu_)
        print("sigma:",sigma_)
        z_1 = np.random.randn(infer_batch_size,512)
        eps_h = np.random.randn(infer_batch_size, z_size)
        #z_1 = mu_[0:infer_batch_size,] + np.multiply(sigma_[0:infer_batch_size,], eps_h)
        predictions=sess.run(infer_logits_,feed_dict={z:z_1,self.net['keep_prob']:1.0})
       
    
        print("Test Sents:")
        for i in range(infer_batch_size):
            print('output:', " ".join(self.id2word([predictions[i]], self.vocab)[0]))
            #print("Z_1:",z_1[i,:])
        N = 10
        for t in np.linspace(0, 1, N):
            print("Interpolation step:", int(t*10))
            #z_i=slerp(z_0[0][0], z_1[0][0], t)
            z_i=z_0[1][0] + (1-t)*(z_1[1][0]-z_0[1][0])
            #z_h=z_i[np.newaxis,]
            zero_c=np.zeros((1, 512), dtype=np.float32)

            #new_enc_state = np.concatenate([zero_c,z_i[np.newaxis,]],0)
            new_enc_state =tf.contrib.rnn.LSTMStateTuple(c=zero_c,h=z_i[np.newaxis,])
            #print(id2word(test_sent, vocab)[0])
            decod=sess.run(self.net['decoder'],feed_dict={self.net['keep_prob']: 1,self.net['thought_vector']:new_enc_state,self.net['source']: test_sent,
                        self.net['source_lengths']: test_sent_len})
            print('decoder output:', self.id2word(decod, self.vocab)[0])
   
        sess.close()

In [3]:
import nltk
import pickle
import codecs
def read_lines(fname):
    ret=[]
    indx=-1
    for line in codecs.open(fname, encoding="utf-8"):
        sent = line
        indx=indx+1
        ret.append(sent)

    return ret



md=Model(hps=None)
txt=read_lines('C:\\Users\\Vinod\projects\\sentence-autoencoder\\China_hack.txt')
md.train(txt)

336
KL Loss shape:
(?,)
mu : (?, 512)
batch_size: Tensor("unstack:0", shape=(), dtype=int32)
input: we expect reprisals from china as well as an onslaught of criticism . _PAD _PAD _PAD _PAD _PAD
output: specifically his likely his 20report_chinese_capabilitiesforcomputer_networkoperationsandcyberespionage.pdf 20report_chinese_capabilitiesforcomputer_networkoperationsandcyberespionage.pdf configured publishing 141 conservatively published shows shows other other net confirmation net china while
ouput codes: [279 153 187 153  23  23 101 245  15 104 244 271 271 226 226 202 102 202
  87 323]
KL Loss: 1123.7645375
epoch 0: inf
input: apt1 focuses on compromising organizations across a broad range of industries in english-speaking countries . _PAD _PAD _PAD
output: » » » » » , _EOS
ouput codes: [330 330 330 330 330   7   2]




KL Loss: 508.9717725
epoch 1: inf
input: we expect reprisals from china as well as an onslaught of criticism . _PAD _PAD _PAD _PAD _PAD
output: we , the , _EOS
ouput codes: [319   7 291   7   2]
KL Loss: 410.5163219
epoch 2: inf
input: the decision to publish a significant part of our intelligence about unit 61398 was a painstaking one _PAD
output: » » » » » » » » » » » » » » » » » » » »
ouput codes: [330 330 330 330 330 330 330 330 330 330 330 330 330 330 330 330 330 330
 330 330]
KL Loss: 184.32288844
epoch 3: inf
input: 3 our conclusions are based exclusively on unclassified , open source information derived from mandiant observations _PAD _PAD
output: » » » » » » » » » » » » » » » » » » » »
ouput codes: [330 330 330 330 330 330 330 330 330 330 330 330 330 330 330 330 330 330
 330 330]
KL Loss: 248.1564603
epoch 4: inf
input: none of the information in this report involves access to or confirmation by classified intelligence . _PAD _PAD
output: » » » » » » » » » » » » » » » » » » » 

NameError: name 'z_0' is not defined

In [71]:
logits

<tf.Tensor 'decoder/infer_logits:0' shape=(?, ?) dtype=int32>

In [36]:
batch_size=10
embed_size=512
z_size=512
n_dec_neurons = 512
n_layers = 1
keep_prob = 1.0
max_sequence_size = 20
from tensorflow.python.layers.core import Dense
with open('vocab.pkl', 'rb') as fp:
    vocab=pickle.load(fp)
    
source = ["the apt uses"]
source_length=[1]
z_1 = np.random.randn(batch_size,512)

checkpoint = "./vae-rnn.ckpt-10"
model=Model(hps=None)
unked,_=model.preprocess(source,vocab)
vocab_size=len(vocab)
loaded_graph = tf.Graph()

with tf.Session(graph=loaded_graph) as sess:
    # Load saved model
    loader = tf.train.import_meta_graph(checkpoint + '.meta')
    loader.restore(sess, checkpoint)
    input_z = loaded_graph.get_tensor_by_name('encoder/batch_z:0')
    logits = loaded_graph.get_tensor_by_name('decoder/infer_logits:0')
    source = loaded_graph.get_tensor_by_name('source:0')
    source_lengths = loaded_graph.get_tensor_by_name('source_lengths:0')
    keep_prob=loaded_graph.get_tensor_by_name('keep_prob:0')
    embedding_matrix=loaded_graph.get_tensor_by_name('source/embedding/embedding_matrix:0')
    embed=embedding_matrix.eval()
    
#working_graph=tf.Graph()
#with tf.Session(graph=working_graph) as sess:
    output_layer = Dense(vocab_size, name='output_projection')
    cells = model._create_rnn_cell(n_dec_neurons, n_layers, keep_prob)
    start_tokens = tf.tile(tf.constant([GO_ID], dtype=tf.int32), [batch_size])
    zero_c=tf.zeros((batch_size, z_size), dtype=tf.float32)
    z=tf.placeholder(tf.float32, shape=(batch_size, z_size), name='z_sampled')
    encoder_state = tf.contrib.rnn.LSTMStateTuple(c=zero_c, h=z)
    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=embedding_matrix, start_tokens=start_tokens, end_token=EOS_ID)
    infer_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=cells,
            helper=helper,
            initial_state=encoder_state,
            output_layer=output_layer)
    infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            infer_decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=max_sequence_size)
    infer_logits=sess.run(infer_outputs,feed_dict={z:z_1,keep_prob:1.0})
    #infer_logits=sess.run(logits,{input_z: z_1,source:[unked]*batch_size,source_lengths:[source_length]*batch_size,keep_prob:1.0} )
    #infer_logits=sess.run(logits,{input_z: z_1,embedding_matrix:embed})

INFO:tensorflow:Restoring parameters from ./vae-rnn.ckpt-10


FailedPreconditionError: Attempting to use uninitialized value decoder/layer_norm_basic_lstm_cell/state/beta
	 [[Node: decoder/layer_norm_basic_lstm_cell/state/beta/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](decoder/layer_norm_basic_lstm_cell/state/beta)]]
	 [[Node: decoder/while/BasicDecoderStep/ArgMax/_289 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_249_decoder/while/BasicDecoderStep/ArgMax", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](^_cloopdecoder/while/Select_2/_247)]]

Caused by op 'decoder/layer_norm_basic_lstm_cell/state/beta/read', defined at:
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-36-919c7c1436e9>", line 53, in <module>
    maximum_iterations=max_sequence_size)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 309, in dynamic_decode
    swap_memory=swap_memory)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2934, in while_loop
    result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2720, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2662, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 254, in body
    decoder_finished) = decoder.step(time, inputs, state)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py", line 138, in step
    cell_outputs, cell_state = self._cell(inputs, state)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 188, in __call__
    return super(RNNCell, self).__call__(inputs, state)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\layers\base.py", line 652, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\contrib\rnn\python\ops\rnn_cell.py", line 1414, in call
    new_c = self._norm(new_c, "state", dtype=dtype)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\contrib\rnn\python\ops\rnn_cell.py", line 1378, in _norm
    vs.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1262, in get_variable
    constraint=constraint)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1097, in get_variable
    constraint=constraint)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 427, in get_variable
    return custom_getter(**custom_getter_kwargs)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 191, in _rnn_get_variable
    variable = getter(*args, **kwargs)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 404, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 806, in _get_single_variable
    constraint=constraint)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\variables.py", line 229, in __init__
    constraint=constraint)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\variables.py", line 373, in _init_from_args
    self._snapshot = array_ops.identity(self._variable, name="read")
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\array_ops.py", line 127, in identity
    return gen_array_ops.identity(input, name=name)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 2728, in identity
    "Identity", input=input, name=name)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 3160, in create_op
    op_def=op_def)
  File "C:\Users\Vinod\Anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value decoder/layer_norm_basic_lstm_cell/state/beta
	 [[Node: decoder/layer_norm_basic_lstm_cell/state/beta/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](decoder/layer_norm_basic_lstm_cell/state/beta)]]
	 [[Node: decoder/while/BasicDecoderStep/ArgMax/_289 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_249_decoder/while/BasicDecoderStep/ArgMax", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](^_cloopdecoder/while/Select_2/_247)]]


In [18]:
source = ["the apt uses"]
unked,_=model.preprocess(source,vocab)
([unked]*10)[0]


ValueError: Cannot evaluate tensor using `eval()`: No default session is registered. Use `with sess.as_default()` or pass an explicit session to `eval(session=sess)`

In [20]:
embed

array([[ 0.4826095 , -0.29743624,  0.9528408 , ..., -0.27740264,
         0.9505074 , -0.3581655 ],
       [-0.47100595, -0.1888142 , -0.37042865, ..., -0.2040554 ,
        -0.8872597 , -0.5737    ],
       [-0.7269716 ,  0.15058541,  0.18357754, ...,  0.30755734,
        -0.55266976,  0.16425014],
       ...,
       [ 0.32169425, -0.27367222, -0.71774787, ..., -0.8827288 ,
        -0.46514425, -0.09313238],
       [ 0.9734705 ,  0.5444694 , -0.56080586, ...,  0.21127783,
        -0.02388286,  0.5769358 ],
       [-0.1442375 , -0.02128563,  0.8911779 , ...,  0.2750093 ,
        -0.68218243,  0.9351503 ]], dtype=float32)

In [91]:
[n.name for n in loaded_graph.as_graph_def().node if "ENC_RNN_mu_h" in n.name]

['encoder/ENC_RNN_mu_h/super_linear_w/Initializer/random_normal/shape',
 'encoder/ENC_RNN_mu_h/super_linear_w/Initializer/random_normal/mean',
 'encoder/ENC_RNN_mu_h/super_linear_w/Initializer/random_normal/stddev',
 'encoder/ENC_RNN_mu_h/super_linear_w/Initializer/random_normal/RandomStandardNormal',
 'encoder/ENC_RNN_mu_h/super_linear_w/Initializer/random_normal/mul',
 'encoder/ENC_RNN_mu_h/super_linear_w/Initializer/random_normal',
 'encoder/ENC_RNN_mu_h/super_linear_w',
 'encoder/ENC_RNN_mu_h/super_linear_w/Assign',
 'encoder/ENC_RNN_mu_h/super_linear_w/read',
 'encoder/ENC_RNN_mu_h/super_linear_b/Initializer/Const',
 'encoder/ENC_RNN_mu_h/super_linear_b',
 'encoder/ENC_RNN_mu_h/super_linear_b/Assign',
 'encoder/ENC_RNN_mu_h/super_linear_b/read',
 'encoder/ENC_RNN_mu_h/MatMul',
 'encoder/ENC_RNN_mu_h/add',
 'gradients/encoder/ENC_RNN_mu_h/add_grad/Shape',
 'gradients/encoder/ENC_RNN_mu_h/add_grad/Shape_1',
 'gradients/encoder/ENC_RNN_mu_h/add_grad/BroadcastGradientArgs',
 'gradient

In [39]:
def parents(op):
    return set(input.op for input in op.inputs)

def children(op):
    return set(op for out in op.outputs for op in out.consumers())

def get_graph():
    """Creates dictionary {node: {child1, child2, ..},..} for current
    TensorFlow graph. Result is compatible with networkx/toposort"""

    ops = loaded_graph.get_operations()
    return {op: children(op) for op in ops}


def print_tf_graph(graph):
    """Prints tensorflow graph in dictionary form."""
    for node in graph:
        for child in graph[node]:
            print("%s -> %s" % (node.name, child.name))

In [81]:
tf.summary.FileWriter("logs", loaded_graph).close()