In [2]:
#importing requried libraries 导入模型实现所需要的包
import numpy as np
import tensorflow as tf
import time
from collections import namedtuple

In [3]:
#reading the Anna Karenina novel text file 读取Anna Karenina 小说的文本
with open('Anna_Karenina.txt', 'r') as f:
    textlines=f.read()
#Building the vocan and encoding the characters as integers 将字符转换为整数，存入数据集中
language_vocab = set(textlines)     #文本中所有字符的集合
vocab_to_integer = {char: j for j, char in enumerate(language_vocab)}   #字符列表进行字典索引
integer_to_vocab = dict(enumerate(language_vocab))
encoded_vocab = np.array([vocab_to_integer[char] for char in textlines], dtype=np.int32)   #对文本中的所有字符进行数字编码

In [4]:
textlines[:200]  #Anna Karenina文本的前200个字符

"Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverything was in confusion in the Oblonskys' house. The wife had\ndiscovered that the husband was carrying on"

In [5]:
encoded_vocab[:200]   #对前200个字符进行编码

array([38, 62, 26, 27, 77,  8, 45,  3, 36, 16, 16, 16, 24, 26, 27, 27, 71,
        3,  9, 26, 11, 57,  1, 57,  8, 19,  3, 26, 45,  8,  3, 26,  1,  1,
        3, 26,  1, 57, 55,  8, 66,  3,  8, 25,  8, 45, 71,  3, 46, 64, 62,
       26, 27, 27, 71,  3,  9, 26, 11, 57,  1, 71,  3, 57, 19,  3, 46, 64,
       62, 26, 27, 27, 71,  3, 57, 64,  3, 57, 77, 19,  3, 82,  4, 64, 16,
        4, 26, 71, 58, 16, 16, 12, 25,  8, 45, 71, 77, 62, 57, 64,  6,  3,
        4, 26, 19,  3, 57, 64,  3, 23, 82, 64,  9, 46, 19, 57, 82, 64,  3,
       57, 64,  3, 77, 62,  8,  3, 59, 76,  1, 82, 64, 19, 55, 71, 19,  0,
        3, 62, 82, 46, 19,  8, 58,  3, 72, 62,  8,  3,  4, 57,  9,  8,  3,
       62, 26, 47, 16, 47, 57, 19, 23, 82, 25,  8, 45,  8, 47,  3, 77, 62,
       26, 77,  3, 77, 62,  8,  3, 62, 46, 19, 76, 26, 64, 47,  3,  4, 26,
       19,  3, 23, 26, 45, 45, 71, 57, 64,  6,  3, 82, 64])

In [6]:
len(language_vocab) #Anna Karenina文本中的所有字符类型，包括83种不同的类型

83

In [7]:
#将数据生成小批次进行训练
def generate_character_batches(data, num_seq, num_steps):
    '''Create a function that returns batches of size
       num_seq x num_steps from data.
    '''
    # Get the number of characters per batch and number of batches  计算batch的大小，batch的数量
    num_char_per_batch = num_seq * num_steps
    num_batches = len(data) // num_char_per_batch

    # Keep only enough characters to make full batches  只保留完成的batch，也就是说对不能整除的部分舍弃
    data = data[:num_batches * num_char_per_batch]

    # Reshape the array into n_seqs rows  将这个数组进行重塑，行数为the number of sequences，列数自动生成
    data = data.reshape((num_seq, -1))

    for i in range(0, data.shape[1], num_steps):
        # The input variables  输入变量
        input_x = data[:, i:i + num_steps]

        # The output variables which are shifted by one  输出变量偏移一个位置
        output_y = np.zeros_like(input_x)

        output_y[:, :-1], output_y[:, -1] = input_x[:, 1:], input_x[:, 0]
        yield input_x, output_y

In [8]:
#生成一个the number of sequences为15，the number of steps为50的批来演示这个函数
generated_batches = generate_character_batches(encoded_vocab, 15, 50)
input_x, output_y = next(generated_batches)
print('input\n', input_x[:10, :10])
print('\ntarget\n', output_y[:10, :10])

input
 [[38 62 26 27 77  8 45  3 36 16]
 [ 3  8  1 57  6 57 76  1  8  3]
 [82  3  1 57  6 62 77 10 62  8]
 [ 3 62 26 47  3 76  8  8 64  3]
 [ 3 82 64  8  3 55 64  8  4 16]
 [71  8 26 45 19  3  1 26 19 77]
 [ 3 74  8  3  6 82  3 57 64 77]
 [82  4  3 77 82  3 11 26 55  8]
 [76  8  9 82 45  8  3  2  8 64]
 [19 26  6  8  3  1 57 55  8  3]]

target
 [[62 26 27 77  8 45  3 36 16 16]
 [ 8  1 57  6 57 76  1  8  3  6]
 [ 3  1 57  6 62 77 10 62  8 26]
 [62 26 47  3 76  8  8 64  3 76]
 [82 64  8  3 55 64  8  4 16  4]
 [ 8 26 45 19  3  1 26 19 77  3]
 [74  8  3  6 82  3 57 64 77 82]
 [ 4  3 77 82  3 11 26 55  8 61]
 [ 8  9 82 45  8  3  2  8 64 77]
 [26  6  8  3  1 57 55  8  3 77]]


In [9]:
#创建输入层
def build_model_inputs(batch_size, num_steps):
    # Declare placeholders for the input and output variables  定义输入变量和输出变量的占位符
    inputs_x = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets_y = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')

    # define the keep_probability for the dropout layer  为了防止过拟合，在dropout层定义keep probability这个参数，用来控制dropout的保留结点数
    keep_probability = tf.placeholder(tf.float32, name='keep_prob')

    return inputs_x, targets_y, keep_probability

In [10]:
#创建LSTM
def build_lstm_cell(size, num_layers, batch_size, keep_probability):
    ### Building the LSTM Cell using the tensorflow function 运用TensorFlow中的函数来构建基本的LSTM单元
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(size)

    # Adding dropout to the layer to prevent overfitting  为了防止过拟合，在层中添加dropout层
    drop_layer = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_probability)

    # Add muliple cells together and stack them up to oprovide a level of more understanding  通过堆叠多个LSTM单元，构建多层LSTM
    stakced_cell = tf.contrib.rnn.MultiRNNCell([drop_layer] * num_layers)
    initial_cell_state = lstm_cell.zero_state(batch_size, tf.float32)

    return lstm_cell, initial_cell_state

In [11]:
#创建输出层
def build_model_output(output, input_size, output_size):
    # Reshaping output of the model to become a bunch of rows, where each row correspond for each step in the seq  重塑输出的尺寸
    sequence_output = tf.concat(output, axis=1)
    reshaped_output = tf.reshape(sequence_output, [-1, input_size])

    # Connect the RNN outputs to a softmax layer 将输出连接到softmax层
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((input_size, output_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(output_size))

    # the output is a set of rows of LSTM cell outputs, so the logits will be a set 
    # of rows of logit outputs, one for each step and sequence  计算logits
    logits = tf.matmul(reshaped_output, softmax_w) + softmax_b

    # Use softmax to get the probabilities for predicted characters  用softmax函数得到预测字符的概率
    model_out = tf.nn.softmax(logits, name='predictions')

    return model_out, logits

In [12]:
#训练损失
def model_loss(logits, targets, lstm_size, num_classes):
    # convert the targets to one-hot encoded and reshape them to match the logits, one row per batch_size per step
    #因为softmax层输出的概率分布，所以对目标值进行one-hot编码
    output_y_one_hot = tf.one_hot(targets, num_classes)
    output_y_reshaped = tf.reshape(output_y_one_hot, logits.get_shape())

    # Use the cross entropy loss  用softmax交叉熵来计算损失
    model_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=output_y_reshaped)
    model_loss = tf.reduce_mean(model_loss)
    return model_loss

In [13]:
#优化
def build_model_optimizer(model_loss, learning_rate, grad_clip):
    # define optimizer for training, using gradient clipping to avoid the exploding of the gradients  
    #定义一个优化器，为了避免梯度爆炸，引入梯度裁剪，设置一个阈值，当梯度值超过这个特定的阈值时，将它重置为阈值大小。
    trainable_variables = tf.trainable_variables()
    gradients, _ = tf.clip_by_global_norm(tf.gradients(model_loss, trainable_variables), grad_clip)

    # Use Adam Optimizer  整个学习过程使用Adam/优化器
    train_operation = tf.train.AdamOptimizer(learning_rate)
    model_optimizer = train_operation.apply_gradients(zip(gradients, trainable_variables))

    return model_optimizer

In [14]:
#构建网络，将所有构建的模块组合起来
class CharLSTM:
    def __init__(self, num_classes, batch_size=64, num_steps=50,
                 lstm_size=128, num_layers=2, learning_rate=0.001,
                 grad_clip=5, sampling=False):

        # When we're using this network for generating text by sampling, we'll be providing the network with
        # one character at a time, so providing an option for it. 我们使用这个网络来生成文本
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()

        # Build the model inputs placeholders of the input and target variables  模型的输入和目标变量
        self.inputs, self.targets, self.keep_prob = build_model_inputs(batch_size, num_steps)

        # Building the LSTM cell  建立LSTM单元
        lstm_cell, self.initial_state = build_lstm_cell(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Run the data through the LSTM layers 通过LSTM层运行这个数据
        # one_hot encode the input  将输入变量进行one-hot编码
        input_x_one_hot = tf.one_hot(self.inputs, num_classes)

        # Runing each sequence step through the LSTM architecture and finally collectting the outputs  通过LSTM的结构来运行，并得到输出值
        outputs, state = tf.nn.dynamic_rnn(lstm_cell, input_x_one_hot, initial_state=self.initial_state)
        self.final_state = state

        # Get softmax predictions and logits  得到softmax的预测和logits
        self.prediction, self.logits = build_model_output(outputs, lstm_size, num_classes)

        # Loss and optimizer (with gradient clipping) 训练损失和引入梯度裁剪的优化
        self.loss = model_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_model_optimizer(self.loss, learning_rate, grad_clip)

In [16]:
#设置超参数值
batch_size = 100        # Sequences per batch  
num_steps = 100         # Number of sequence steps per batch  每个batch的序列步长
lstm_size = 512         # Size of hidden layers in LSTMs  
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate  学习率
keep_probability = 0.5         # Dropout keep probability dropout层中保留结点的比例
print('Starting the training process...')
epochs = 5 

Starting the training process...


In [17]:
# Save a checkpoint N iterations每N轮进行一次变量保存
save_every_n = 100

In [19]:
LSTM_model = CharLSTM(len(language_vocab), batch_size=batch_size, num_steps=num_steps,
                      lstm_size=lstm_size, num_layers=num_layers,
                      learning_rate=learning_rate)


In [20]:
saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Use the line below to load a checkpoint and resume training
    # saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network  训练网络
        new_state = sess.run(LSTM_model.initial_state)
        loss = 0
        for x, y in generate_character_batches(encoded_vocab, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {LSTM_model.inputs: x,
                    LSTM_model.targets: y,
                    LSTM_model.keep_prob: keep_probability,
                    LSTM_model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([LSTM_model.loss,
                                                 LSTM_model.final_state,
                                                 LSTM_model.optimizer],
                                                feed_dict=feed)

            end = time.time()
            print('Epoch number: {}/{}... '.format(e + 1, epochs),
                  'Step: {}... '.format(counter),
                  'loss: {:.4f}... '.format(batch_loss),
                  '{:.3f} sec/batch'.format((end - start)))

            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))


Epoch number: 1/5...  Step: 1...  loss: 4.4197...  7.640 sec/batch
Epoch number: 1/5...  Step: 2...  loss: 4.3664...  7.148 sec/batch
Epoch number: 1/5...  Step: 3...  loss: 4.2514...  6.937 sec/batch
Epoch number: 1/5...  Step: 4...  loss: 3.8124...  6.712 sec/batch
Epoch number: 1/5...  Step: 5...  loss: 3.4003...  6.733 sec/batch
Epoch number: 1/5...  Step: 6...  loss: 3.3277...  6.739 sec/batch
Epoch number: 1/5...  Step: 7...  loss: 3.2580...  6.426 sec/batch
Epoch number: 1/5...  Step: 8...  loss: 3.1936...  6.739 sec/batch
Epoch number: 1/5...  Step: 9...  loss: 3.1433...  6.618 sec/batch
Epoch number: 1/5...  Step: 10...  loss: 3.1419...  6.756 sec/batch
Epoch number: 1/5...  Step: 11...  loss: 3.1315...  6.734 sec/batch
Epoch number: 1/5...  Step: 12...  loss: 3.1202...  6.393 sec/batch
Epoch number: 1/5...  Step: 13...  loss: 3.1152...  6.377 sec/batch
Epoch number: 1/5...  Step: 14...  loss: 3.1369...  6.394 sec/batch
Epoch number: 1/5...  Step: 15...  loss: 3.1236...  6.381

Epoch number: 1/5...  Step: 122...  loss: 2.5608...  6.661 sec/batch
Epoch number: 1/5...  Step: 123...  loss: 2.5617...  7.222 sec/batch
Epoch number: 1/5...  Step: 124...  loss: 2.5829...  6.807 sec/batch
Epoch number: 1/5...  Step: 125...  loss: 2.6361...  6.625 sec/batch
Epoch number: 1/5...  Step: 126...  loss: 2.5401...  6.742 sec/batch
Epoch number: 1/5...  Step: 127...  loss: 2.5783...  6.622 sec/batch
Epoch number: 1/5...  Step: 128...  loss: 2.5695...  6.930 sec/batch
Epoch number: 1/5...  Step: 129...  loss: 2.5429...  6.690 sec/batch
Epoch number: 1/5...  Step: 130...  loss: 2.5458...  7.829 sec/batch
Epoch number: 1/5...  Step: 131...  loss: 2.5373...  7.001 sec/batch
Epoch number: 1/5...  Step: 132...  loss: 2.5274...  7.079 sec/batch
Epoch number: 1/5...  Step: 133...  loss: 2.5341...  6.808 sec/batch
Epoch number: 1/5...  Step: 134...  loss: 2.5284...  6.567 sec/batch
Epoch number: 1/5...  Step: 135...  loss: 2.4970...  6.666 sec/batch
Epoch number: 1/5...  Step: 136...

Epoch number: 2/5...  Step: 241...  loss: 2.2363...  7.566 sec/batch
Epoch number: 2/5...  Step: 242...  loss: 2.2335...  7.270 sec/batch
Epoch number: 2/5...  Step: 243...  loss: 2.2369...  6.848 sec/batch
Epoch number: 2/5...  Step: 244...  loss: 2.1976...  6.738 sec/batch
Epoch number: 2/5...  Step: 245...  loss: 2.2597...  6.801 sec/batch
Epoch number: 2/5...  Step: 246...  loss: 2.2400...  7.153 sec/batch
Epoch number: 2/5...  Step: 247...  loss: 2.2401...  7.105 sec/batch
Epoch number: 2/5...  Step: 248...  loss: 2.2894...  6.883 sec/batch
Epoch number: 2/5...  Step: 249...  loss: 2.2330...  7.411 sec/batch
Epoch number: 2/5...  Step: 250...  loss: 2.2809...  6.810 sec/batch
Epoch number: 2/5...  Step: 251...  loss: 2.2288...  6.747 sec/batch
Epoch number: 2/5...  Step: 252...  loss: 2.2369...  6.822 sec/batch
Epoch number: 2/5...  Step: 253...  loss: 2.2326...  6.890 sec/batch
Epoch number: 2/5...  Step: 254...  loss: 2.2507...  7.049 sec/batch
Epoch number: 2/5...  Step: 255...

Epoch number: 2/5...  Step: 360...  loss: 2.1109...  6.793 sec/batch
Epoch number: 2/5...  Step: 361...  loss: 2.1157...  6.765 sec/batch
Epoch number: 2/5...  Step: 362...  loss: 2.1097...  6.777 sec/batch
Epoch number: 2/5...  Step: 363...  loss: 2.1243...  6.919 sec/batch
Epoch number: 2/5...  Step: 364...  loss: 2.1183...  6.876 sec/batch
Epoch number: 2/5...  Step: 365...  loss: 2.1295...  6.977 sec/batch
Epoch number: 2/5...  Step: 366...  loss: 2.1319...  6.911 sec/batch
Epoch number: 2/5...  Step: 367...  loss: 2.1114...  6.816 sec/batch
Epoch number: 2/5...  Step: 368...  loss: 2.0973...  6.828 sec/batch
Epoch number: 2/5...  Step: 369...  loss: 2.1165...  6.725 sec/batch
Epoch number: 2/5...  Step: 370...  loss: 2.1276...  6.805 sec/batch
Epoch number: 2/5...  Step: 371...  loss: 2.1457...  6.898 sec/batch
Epoch number: 2/5...  Step: 372...  loss: 2.1558...  6.994 sec/batch
Epoch number: 2/5...  Step: 373...  loss: 2.1422...  6.919 sec/batch
Epoch number: 2/5...  Step: 374...

Epoch number: 3/5...  Step: 479...  loss: 1.9809...  6.819 sec/batch
Epoch number: 3/5...  Step: 480...  loss: 2.0021...  7.004 sec/batch
Epoch number: 3/5...  Step: 481...  loss: 1.9675...  6.989 sec/batch
Epoch number: 3/5...  Step: 482...  loss: 1.9913...  7.248 sec/batch
Epoch number: 3/5...  Step: 483...  loss: 2.0026...  6.977 sec/batch
Epoch number: 3/5...  Step: 484...  loss: 1.9892...  6.838 sec/batch
Epoch number: 3/5...  Step: 485...  loss: 1.9781...  6.834 sec/batch
Epoch number: 3/5...  Step: 486...  loss: 2.0174...  6.852 sec/batch
Epoch number: 3/5...  Step: 487...  loss: 1.9833...  7.295 sec/batch
Epoch number: 3/5...  Step: 488...  loss: 1.9994...  7.139 sec/batch
Epoch number: 3/5...  Step: 489...  loss: 1.9716...  6.989 sec/batch
Epoch number: 3/5...  Step: 490...  loss: 1.9807...  6.787 sec/batch
Epoch number: 3/5...  Step: 491...  loss: 1.9836...  7.035 sec/batch
Epoch number: 3/5...  Step: 492...  loss: 1.9905...  7.128 sec/batch
Epoch number: 3/5...  Step: 493...

Epoch number: 4/5...  Step: 598...  loss: 1.9204...  7.566 sec/batch
Epoch number: 4/5...  Step: 599...  loss: 1.9190...  7.594 sec/batch
Epoch number: 4/5...  Step: 600...  loss: 1.8985...  8.960 sec/batch
Epoch number: 4/5...  Step: 601...  loss: 1.9297...  7.311 sec/batch
Epoch number: 4/5...  Step: 602...  loss: 1.9211...  6.695 sec/batch
Epoch number: 4/5...  Step: 603...  loss: 1.9541...  6.872 sec/batch
Epoch number: 4/5...  Step: 604...  loss: 1.9198...  7.483 sec/batch
Epoch number: 4/5...  Step: 605...  loss: 1.9011...  7.677 sec/batch
Epoch number: 4/5...  Step: 606...  loss: 1.8946...  6.650 sec/batch
Epoch number: 4/5...  Step: 607...  loss: 1.9267...  6.564 sec/batch
Epoch number: 4/5...  Step: 608...  loss: 1.9472...  6.625 sec/batch
Epoch number: 4/5...  Step: 609...  loss: 1.9135...  6.663 sec/batch
Epoch number: 4/5...  Step: 610...  loss: 1.8966...  6.582 sec/batch
Epoch number: 4/5...  Step: 611...  loss: 1.9169...  6.585 sec/batch
Epoch number: 4/5...  Step: 612...

Epoch number: 4/5...  Step: 717...  loss: 1.8272...  6.600 sec/batch
Epoch number: 4/5...  Step: 718...  loss: 1.8702...  6.720 sec/batch
Epoch number: 4/5...  Step: 719...  loss: 1.8466...  6.690 sec/batch
Epoch number: 4/5...  Step: 720...  loss: 1.8102...  6.693 sec/batch
Epoch number: 4/5...  Step: 721...  loss: 1.8639...  6.737 sec/batch
Epoch number: 4/5...  Step: 722...  loss: 1.8614...  6.906 sec/batch
Epoch number: 4/5...  Step: 723...  loss: 1.8475...  6.699 sec/batch
Epoch number: 4/5...  Step: 724...  loss: 1.8538...  6.659 sec/batch
Epoch number: 4/5...  Step: 725...  loss: 1.8153...  7.428 sec/batch
Epoch number: 4/5...  Step: 726...  loss: 1.8102...  6.658 sec/batch
Epoch number: 4/5...  Step: 727...  loss: 1.8596...  6.608 sec/batch
Epoch number: 4/5...  Step: 728...  loss: 1.8576...  6.602 sec/batch
Epoch number: 4/5...  Step: 729...  loss: 1.8521...  6.563 sec/batch
Epoch number: 4/5...  Step: 730...  loss: 1.8505...  7.383 sec/batch
Epoch number: 4/5...  Step: 731...

Epoch number: 5/5...  Step: 836...  loss: 1.7581...  6.742 sec/batch
Epoch number: 5/5...  Step: 837...  loss: 1.7918...  6.778 sec/batch
Epoch number: 5/5...  Step: 838...  loss: 1.7276...  6.673 sec/batch
Epoch number: 5/5...  Step: 839...  loss: 1.7767...  6.768 sec/batch
Epoch number: 5/5...  Step: 840...  loss: 1.7590...  6.705 sec/batch
Epoch number: 5/5...  Step: 841...  loss: 1.7686...  6.861 sec/batch
Epoch number: 5/5...  Step: 842...  loss: 1.8202...  7.449 sec/batch
Epoch number: 5/5...  Step: 843...  loss: 1.7453...  6.766 sec/batch
Epoch number: 5/5...  Step: 844...  loss: 1.8302...  6.901 sec/batch
Epoch number: 5/5...  Step: 845...  loss: 1.7795...  6.577 sec/batch
Epoch number: 5/5...  Step: 846...  loss: 1.7739...  6.641 sec/batch
Epoch number: 5/5...  Step: 847...  loss: 1.7686...  6.604 sec/batch
Epoch number: 5/5...  Step: 848...  loss: 1.7903...  6.678 sec/batch
Epoch number: 5/5...  Step: 849...  loss: 1.7909...  6.605 sec/batch
Epoch number: 5/5...  Step: 850...

Epoch number: 5/5...  Step: 955...  loss: 1.7464...  6.831 sec/batch
Epoch number: 5/5...  Step: 956...  loss: 1.7386...  6.845 sec/batch
Epoch number: 5/5...  Step: 957...  loss: 1.7323...  6.662 sec/batch
Epoch number: 5/5...  Step: 958...  loss: 1.7249...  6.675 sec/batch
Epoch number: 5/5...  Step: 959...  loss: 1.7473...  6.753 sec/batch
Epoch number: 5/5...  Step: 960...  loss: 1.7893...  6.778 sec/batch
Epoch number: 5/5...  Step: 961...  loss: 1.7229...  6.867 sec/batch
Epoch number: 5/5...  Step: 962...  loss: 1.7273...  6.781 sec/batch
Epoch number: 5/5...  Step: 963...  loss: 1.7156...  6.775 sec/batch
Epoch number: 5/5...  Step: 964...  loss: 1.7159...  6.697 sec/batch
Epoch number: 5/5...  Step: 965...  loss: 1.7508...  6.746 sec/batch
Epoch number: 5/5...  Step: 966...  loss: 1.7287...  6.941 sec/batch
Epoch number: 5/5...  Step: 967...  loss: 1.7409...  6.923 sec/batch
Epoch number: 5/5...  Step: 968...  loss: 1.7251...  6.853 sec/batch
Epoch number: 5/5...  Step: 969...

In [22]:
print('Training is done...')

Training is done...


In [23]:
#Defining helper functions for sampling from the network  定义函数从网络中抽样
def pick_top_n_characters(preds, vocab_size, top_n_chars=4):  #从预测结果中选取前4个最有可能的字符
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n_chars]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]  #随机选取一个字符
    return c

In [24]:
#生成新文本
def sample_from_LSTM_output(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    LSTM_model = CharLSTM(len(language_vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(LSTM_model.initial_state)
        for char in prime:
            x = np.zeros((1, 1))
            x[0, 0] = vocab_to_integer[char]
            feed = {LSTM_model.inputs: x,
                    LSTM_model.keep_prob: 1.,
                    LSTM_model.initial_state: new_state}
            preds, new_state = sess.run([LSTM_model.prediction, LSTM_model.final_state],
                                        feed_dict=feed)

        c = pick_top_n_characters(preds, len(language_vocab))
        samples.append(integer_to_vocab[c])

        for i in range(n_samples):
            x[0, 0] = c
            feed = {LSTM_model.inputs: x,
                    LSTM_model.keep_prob: 1.,
                    LSTM_model.initial_state: new_state}
            preds, new_state = sess.run([LSTM_model.prediction, LSTM_model.final_state],
                                        feed_dict=feed)

            c = pick_top_n_characters(preds, len(language_vocab))
            samples.append(integer_to_vocab[c])

    return ''.join(samples)

In [27]:
print('Loading latest checkpoint..')

Loading latest checkpoint..


In [28]:
#选用最终的训练参数进行文本生成
checkpoint = tf.train.latest_checkpoint('checkpoints')
print('Sampling text frm the trained model....')
sampled_text = sample_from_LSTM_output(checkpoint, 2000, lstm_size, len(language_vocab), prime="Far")
print(sampled_text)

Sampling text frm the trained model....
INFO:tensorflow:Restoring parameters from checkpoints\i990_l512.ckpt
Fary.

A had st pring that he west it that in a shise. He's dind a the seare," she tadky to her hims lantence was the counten the steation, shisenting, the sare with stope her hand to heard the race, and thit sen therred whe had been some then thought that tho gand hores,
sha dand to
ser him, be to say that her
himself cound to anst to the sand. She had bo gead hem and her homen welk of serthire while he sand ang to hor samp he with anghen the planct of her and the reations war hes lenghtion was ther heade she was to bo himent over her tanked to her ho donce the
shantsors of a to the ran of
the sempers the she whot he was ant the reat thit wal so cherd and the clook to bet his sood and to
dearity, when he sould, as he sace the began ta thore was sond when see so herr and the righines, and that thinkieg time her and to
the ringer and the rood. She had to be that he was shend was 