In [1]:
from torchtext import data
from torchtext.data import BucketIterator

def read_data(input_file, max_length):
    with open(input_file, encoding="utf8") as f:
        poetries = []
        poetry = []
        for line in f:
            contends = line.strip()
            if len(poetry) + len(contends) <= max_length:
                if contends:
                    poetry.extend(contends)
                else:
                    poetries.append(poetry)
                    poetry = []
            else:
                poetries.append(poetry)
                poetry = list(contends)
        if poetry:
            poetries.append(poetry)
        return poetries


class PoetryDataset(data.Dataset):

    def __init__(self, text_field, datafile, max_length, **kwargs):
        fields = [("text", text_field)]
        datas = read_data(datafile, max_length)
        examples = []
        for text in datas:
            examples.append(data.Example.fromlist([text], fields))
        super(PoetryDataset, self).__init__(examples, fields, **kwargs)


def load_iters(eos_token="[EOS]", batch_size=32, device="cpu", data_path='data', max_length=128):
    TEXT = data.Field(eos_token=eos_token, batch_first=True, include_lengths=True)
    datas = PoetryDataset(TEXT, "./poetryFromTang.txt", max_length)
    train_data, dev_data, test_data = datas.split([0.8, 0.1, 0.1])

    TEXT.build_vocab(train_data)

    train_iter, dev_iter, test_iter = BucketIterator.splits(
        (train_data, dev_data, test_data),
        batch_sizes=(batch_size, batch_size, batch_size),
        device=device,
        sort_key=lambda x: len(x.text),
        sort_within_batch=True,
        repeat=False,
        shuffle=True
    )
    return train_iter, dev_iter, test_iter, TEXT

In [8]:
# -*- coding:utf8 -*-
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm, trange
from tensorboardX import SummaryWriter
import math

torch.manual_seed(1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

BATCH_SIZE = 64
HIDDEN_DIM = 512
LAYER_NUM = 1
EPOCHS = 200
DROPOUT_RATE = 0.5
LEARNING_RATE = 0.01
MOMENTUM = 0.9
CLIP = 5
DECAY_RATE = 0.05  # learning rate decay rate
EOS_TOKEN = "[EOS]"
DATA_PATH = 'data'
EMBEDDING_SIZE = 200
TEMPERATURE = 0.8  # Higher temperature means more diversity.
MAX_LEN = 64


def train(train_iter, dev_iter, loss_func, optimizer, epochs, clip):
    for epoch in trange(epochs):
        model.train()
        total_loss = 0
        total_words = 0
        for i, batch in enumerate(tqdm(train_iter)):
            text, lens = batch.text
            if epoch == 0 and i == 0:
                tqdm.write(' '.join([TEXT.vocab.itos[i] for i in text[0]]))
                tqdm.write(' '.join([str(i.item()) for i in text[0]]))
            inputs = text[:, :-1]
            targets = text[:, 1:]
            init_hidden = model.lstm.init_hidden(inputs.size(0))
            logits, _ = model(inputs, lens - 1, init_hidden)  # [EOS] is included in length.
            loss = loss_func(logits.reshape(-1, logits.size(-1)), targets.reshape(-1))

            model.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()
            total_loss += loss.item()
            total_words += lens.sum().item()
        tqdm.write("Epoch: %d, Train perplexity: %d" % (epoch + 1, math.exp(total_loss / total_words)))
        writer.add_scalar('Train_Loss', total_loss, epoch)
        eval(dev_iter, True, epoch)

        lr = LEARNING_RATE / (1 + DECAY_RATE * (epoch + 1))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr


def eval(data_iter, is_dev=False, epoch=None):
    model.eval()
    with torch.no_grad():
        total_words = 0
        total_loss = 0
        for i, batch in enumerate(data_iter):
            text, lens = batch.text
            inputs = text[:, :-1]
            targets = text[:, 1:]
            model.zero_grad()
            init_hidden = model.lstm.init_hidden(inputs.size(0))
            logits, _ = model(inputs, lens - 1, init_hidden)  # [EOS] is included in length.
            loss = loss_func(logits.reshape(-1, logits.size(-1)), targets.reshape(-1))

            total_loss += loss.item()
            total_words += lens.sum().item()
    if epoch is not None:
        tqdm.write(
            "Epoch: %d, %s perplexity %.3f" % (
                epoch + 1, "Dev" if is_dev else "Test", math.exp(total_loss / total_words)))
        writer.add_scalar('Dev_Loss', total_loss, epoch)
    else:
        tqdm.write(
            "%s perplexity %.3f" % ("Dev" if is_dev else "Test", math.exp(total_loss / total_words)))


def generate(eos_idx, word, temperature=0.8):
    model.eval()
    with torch.no_grad():
        if word in TEXT.vocab.stoi:
            idx = TEXT.vocab.stoi[word]
            inputs = torch.tensor([idx])
        else:
            print("%s is not in vocabulary, choose by random." % word)
            prob = torch.ones(len(TEXT.vocab.stoi))
            inputs = torch.multinomial(prob, 1)
            idx = inputs[0].item()

        inputs = inputs.unsqueeze(1).to(device)  # shape [1, 1]
        lens = torch.tensor([1]).to(device)
        hidden = tuple([h.to(device) for h in model.lstm.init_hidden(1)])
        poetry = [TEXT.vocab.itos[idx]]

        while idx != eos_idx:
            logits, hidden = model(inputs, lens, hidden)  # logits: (1, 1, vocab_size)
            word_weights = logits.squeeze().div(temperature).exp().cpu()
            idx = torch.multinomial(word_weights, 1)[0].item()
            inputs.fill_(idx)
            poetry.append(TEXT.vocab.itos[idx])
        print("".join(poetry[:-1]))

In [9]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size=128, dropout_rate=0.5, layer_num=1):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.layer_num = layer_num
        if layer_num == 1:
            self.lstm = nn.LSTM(input_size, hidden_size, layer_num, batch_first=True)
        else:
            self.lstm = nn.LSTM(input_size, hidden_size, layer_num, dropout=dropout_rate, batch_first=True)

        self.init_weights()

    def init_weights(self):
        for p in self.lstm.parameters():
            if p.dim() > 1:
                nn.init.xavier_normal_(p)
            else:
                p.data.zero_()

    def init_hidden(self, batch_size):
        weight = next(self.parameters())
        return (weight.new_zeros(self.layer_num, batch_size, self.hidden_size),
                weight.new_zeros(self.layer_num, batch_size, self.hidden_size))

    def forward(self, x, lens, hidden):
        '''
        :param x: (batch, seq_len, input_size)
        :param lens: (batch, ), in descending order
        :param hidden: tuple(h,c), each has shape (num_layer, batch, hidden_size)
        :return: output: (batch, seq_len, hidden_size)
                 tuple(h,c): each has shape (num_layer, batch, hidden_size)
        '''
        packed_x = nn.utils.rnn.pack_padded_sequence(x, lens, batch_first=True)
        packed_output, (h, c) = self.lstm(packed_x, hidden)
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        return output, (h, c)


class LSTM_LM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size=128, dropout_rate=0.2, layer_num=1, max_seq_len=128):
        super(LSTM_LM, self).__init__()
        self.hidden_size = hidden_size
        self.layer_num = layer_num
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = LSTM(embed_size, hidden_size, dropout_rate, layer_num)
        self.project = nn.Linear(hidden_size, vocab_size)
        self.dropout = nn.Dropout(dropout_rate)
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_normal_(self.embed.weight)
        nn.init.xavier_normal_(self.project.weight)

    def forward(self, x, lens, hidden):
        '''
        :param x: (batch, seq_len, input_size)
        :param lens: (batch, ), in descending order
        :param hidden: tuple(h,c), each has shape (num_layer, batch, hidden_size)
        :return: output: (batch, seq_len, hidden_size)
                 tuple(h,c): each has shape (num_layer, batch, hidden_size)
        '''
        embed = self.embed(x)
        hidden, (h, c) = self.lstm(self.dropout(embed), lens, hidden)  # (batch, seq_len, hidden_size)
        out = self.project(self.dropout(hidden))  # (batch, seq_len, vocab_size)
        return out, (h, c)


In [10]:
train_iter, dev_iter, test_iter, TEXT = load_iters(EOS_TOKEN, BATCH_SIZE, device, DATA_PATH, MAX_LEN)
pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
eos_idx = TEXT.vocab.stoi[EOS_TOKEN]
model = LSTM_LM(len(TEXT.vocab), EMBEDDING_SIZE, HIDDEN_DIM, DROPOUT_RATE, LAYER_NUM).to(device)

optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
loss_func = torch.nn.CrossEntropyLoss(ignore_index=pad_idx, reduction="sum")
writer = SummaryWriter("logs")
train(train_iter, dev_iter, loss_func, optimizer, EPOCHS, CLIP)
eval(test_iter, is_dev=False)
try:
    while True:
        word = input("Input the first word or press Ctrl-C to exit: ")
        generate(eos_idx, word.strip(), TEMPERATURE)
except:
    pass

  0%|          | 0/200 [00:00<?, ?it/s]
                                       
                                     

道 傍 过 者 问 行 人 ， 行 人 但 云 点 行 频 。 或 从 十 五 北 防 河 ， 便 至 四 十 西 营 田 。 去 时 里 正 与 裹 头 ， 归 来 头 白 还 戍 边 。 边 亭 流 血 成 海 水 ， 武 皇 开 边 意 未 已 。 [EOS]


  0%|          | 0/200 [00:00<?, ?it/s]
                                       
                                     

61 1046 206 180 165 93 6 3 93 6 243 13 1232 93 2272 4 1146 138 104 137 173 2247 158 3 491 146 188 104 94 1316 466 4 47 17 57 555 59 2133 76 3 72 9 76 26 207 1145 205 4 205 212 73 298 117 67 25 3 335 179 66 205 97 28 71 4 2


  0%|          | 0/200 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 20%|██        | 1/5 [00:00<00:03,  1.23it/s]
                                       3it/s]

Epoch: 1, Train perplexity: 1944


                                       

Epoch: 1, Dev perplexity 1819.759


  0%|          | 1/200 [00:00<03:11,  1.04it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 33.42it/s]
                                               

Epoch: 2, Train perplexity: 1722


                                               

Epoch: 2, Dev perplexity 1531.292


  1%|          | 2/200 [00:01<02:23,  1.38it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                               

Epoch: 3, Train perplexity: 1368


                                               

Epoch: 3, Dev perplexity 1155.715


  2%|▏         | 3/200 [00:01<01:49,  1.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.19it/s]
                                               

Epoch: 4, Train perplexity: 1021


                                               

Epoch: 4, Dev perplexity 1053.890


  2%|▏         | 4/200 [00:01<01:25,  2.30it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                               

Epoch: 5, Train perplexity: 1278


                                               

Epoch: 5, Dev perplexity 1150.963


  2%|▎         | 5/200 [00:01<01:07,  2.88it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.19it/s]
                                               

Epoch: 6, Train perplexity: 982


                                               

Epoch: 6, Dev perplexity 881.378


  3%|▎         | 6/200 [00:01<00:56,  3.45it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.71it/s]
                                               

Epoch: 7, Train perplexity: 900


                                               

Epoch: 7, Dev perplexity 878.489


  4%|▎         | 7/200 [00:01<00:47,  4.04it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                               

Epoch: 8, Train perplexity: 857


                                               

Epoch: 8, Dev perplexity 811.584


  4%|▍         | 8/200 [00:02<00:41,  4.62it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.84it/s]
                                               

Epoch: 9, Train perplexity: 814


                                               

Epoch: 9, Dev perplexity 775.811


  4%|▍         | 9/200 [00:02<00:37,  5.10it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.79it/s]
                                               

Epoch: 10, Train perplexity: 764


                                               

Epoch: 10, Dev perplexity 731.924


  5%|▌         | 10/200 [00:02<00:34,  5.47it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 11, Train perplexity: 716


                                                

Epoch: 11, Dev perplexity 688.240


  6%|▌         | 11/200 [00:02<00:32,  5.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 12, Train perplexity: 675


                                                

Epoch: 12, Dev perplexity 659.153


  6%|▌         | 12/200 [00:02<00:30,  6.07it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 13, Train perplexity: 648


                                                

Epoch: 13, Dev perplexity 642.201


  6%|▋         | 13/200 [00:02<00:29,  6.29it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 14, Train perplexity: 634


                                                

Epoch: 14, Dev perplexity 627.942


  7%|▋         | 14/200 [00:02<00:28,  6.47it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.71it/s]
                                                

Epoch: 15, Train perplexity: 620


                                                

Epoch: 15, Dev perplexity 618.987


  8%|▊         | 15/200 [00:03<00:28,  6.52it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 16, Train perplexity: 612


                                                

Epoch: 16, Dev perplexity 613.976


  8%|▊         | 16/200 [00:03<00:27,  6.66it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.94it/s]
                                                

Epoch: 17, Train perplexity: 604


                                                

Epoch: 17, Dev perplexity 611.983


  8%|▊         | 17/200 [00:03<00:27,  6.63it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.94it/s]
                                                

Epoch: 18, Train perplexity: 599


                                                

Epoch: 18, Dev perplexity 610.611


  9%|▉         | 18/200 [00:03<00:27,  6.63it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.84it/s]
                                                

Epoch: 19, Train perplexity: 595


                                                

Epoch: 19, Dev perplexity 609.862


 10%|▉         | 19/200 [00:03<00:27,  6.56it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 20, Train perplexity: 594


                                                

Epoch: 20, Dev perplexity 608.771


 10%|█         | 20/200 [00:03<00:27,  6.66it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 21, Train perplexity: 590


                                                

Epoch: 21, Dev perplexity 608.308


 10%|█         | 21/200 [00:03<00:26,  6.68it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 22, Train perplexity: 587


                                                

Epoch: 22, Dev perplexity 608.206


 11%|█         | 22/200 [00:04<00:26,  6.71it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 23, Train perplexity: 586


                                                

Epoch: 23, Dev perplexity 608.090


 12%|█▏        | 23/200 [00:04<00:26,  6.60it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.79it/s]
                                                

Epoch: 24, Train perplexity: 584


                                                

Epoch: 24, Dev perplexity 608.120


 12%|█▏        | 24/200 [00:04<00:26,  6.57it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 25, Train perplexity: 585


                                                

Epoch: 25, Dev perplexity 607.853


 12%|█▎        | 25/200 [00:04<00:26,  6.71it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 26, Train perplexity: 583


                                                

Epoch: 26, Dev perplexity 608.220


 13%|█▎        | 26/200 [00:04<00:25,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 27, Train perplexity: 582


                                                

Epoch: 27, Dev perplexity 608.434


 14%|█▎        | 27/200 [00:04<00:25,  6.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.20it/s]
                                                

Epoch: 28, Train perplexity: 580


                                                

Epoch: 28, Dev perplexity 608.461


 14%|█▍        | 28/200 [00:05<00:25,  6.77it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 29, Train perplexity: 580


                                                

Epoch: 29, Dev perplexity 608.881


 14%|█▍        | 29/200 [00:05<00:25,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 30, Train perplexity: 579


                                                

Epoch: 30, Dev perplexity 608.734


 15%|█▌        | 30/200 [00:05<00:24,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 31, Train perplexity: 580


                                                

Epoch: 31, Dev perplexity 608.738


 16%|█▌        | 31/200 [00:05<00:24,  6.86it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 32, Train perplexity: 577


                                                

Epoch: 32, Dev perplexity 608.874


 16%|█▌        | 32/200 [00:05<00:24,  6.88it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 33, Train perplexity: 579


                                                

Epoch: 33, Dev perplexity 608.879


 16%|█▋        | 33/200 [00:05<00:24,  6.89it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 34, Train perplexity: 578


                                                

Epoch: 34, Dev perplexity 609.038


 17%|█▋        | 34/200 [00:05<00:23,  6.93it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.78it/s]
                                                

Epoch: 35, Train perplexity: 577


                                                

Epoch: 35, Dev perplexity 608.602


 18%|█▊        | 35/200 [00:06<00:24,  6.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 36, Train perplexity: 576


                                                

Epoch: 36, Dev perplexity 608.481


 18%|█▊        | 36/200 [00:06<00:23,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 37, Train perplexity: 575


                                                

Epoch: 37, Dev perplexity 608.844


 18%|█▊        | 37/200 [00:06<00:23,  6.89it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.71it/s]
                                                

Epoch: 38, Train perplexity: 575


                                                

Epoch: 38, Dev perplexity 609.000


 19%|█▉        | 38/200 [00:06<00:23,  6.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 39, Train perplexity: 575


                                                

Epoch: 39, Dev perplexity 609.000


 20%|█▉        | 39/200 [00:06<00:23,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 40, Train perplexity: 574


                                                

Epoch: 40, Dev perplexity 609.103


 20%|██        | 40/200 [00:06<00:23,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 41, Train perplexity: 574


                                                

Epoch: 41, Dev perplexity 609.188


 20%|██        | 41/200 [00:06<00:23,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.48it/s]
                                                

Epoch: 42, Train perplexity: 575


                                                

Epoch: 42, Dev perplexity 608.995


 21%|██        | 42/200 [00:07<00:23,  6.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 43, Train perplexity: 575


                                                

Epoch: 43, Dev perplexity 609.318


 22%|██▏       | 43/200 [00:07<00:23,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 44, Train perplexity: 572


                                                

Epoch: 44, Dev perplexity 609.307


 22%|██▏       | 44/200 [00:07<00:22,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 45, Train perplexity: 572


                                                

Epoch: 45, Dev perplexity 609.195


 22%|██▎       | 45/200 [00:07<00:22,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 46, Train perplexity: 571


                                                

Epoch: 46, Dev perplexity 609.153


 23%|██▎       | 46/200 [00:07<00:22,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 47, Train perplexity: 570


                                                

Epoch: 47, Dev perplexity 608.919


 24%|██▎       | 47/200 [00:07<00:22,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.84it/s]
                                                

Epoch: 48, Train perplexity: 571


                                                

Epoch: 48, Dev perplexity 609.042


 24%|██▍       | 48/200 [00:07<00:22,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.32it/s]
                                                

Epoch: 49, Train perplexity: 570


                                                

Epoch: 49, Dev perplexity 608.782


 24%|██▍       | 49/200 [00:08<00:22,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.32it/s]
                                                

Epoch: 50, Train perplexity: 569


                                                

Epoch: 50, Dev perplexity 608.714


 25%|██▌       | 50/200 [00:08<00:22,  6.62it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 51, Train perplexity: 570


                                                

Epoch: 51, Dev perplexity 608.675


 26%|██▌       | 51/200 [00:08<00:22,  6.61it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.56it/s]
                                                

Epoch: 52, Train perplexity: 570


                                                

Epoch: 52, Dev perplexity 608.289


 26%|██▌       | 52/200 [00:08<00:22,  6.66it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 53, Train perplexity: 567


                                                

Epoch: 53, Dev perplexity 608.255


 26%|██▋       | 53/200 [00:08<00:21,  6.70it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 54, Train perplexity: 569


                                                

Epoch: 54, Dev perplexity 608.057


 27%|██▋       | 54/200 [00:08<00:21,  6.69it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.32it/s]
                                                

Epoch: 55, Train perplexity: 566


                                                

Epoch: 55, Dev perplexity 607.822


 28%|██▊       | 55/200 [00:08<00:21,  6.62it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.84it/s]
                                                

Epoch: 56, Train perplexity: 568


                                                

Epoch: 56, Dev perplexity 607.851


 28%|██▊       | 56/200 [00:09<00:21,  6.60it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 57, Train perplexity: 568


                                                

Epoch: 57, Dev perplexity 607.406


 28%|██▊       | 57/200 [00:09<00:21,  6.61it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 58, Train perplexity: 565


                                                

Epoch: 58, Dev perplexity 606.858


 29%|██▉       | 58/200 [00:09<00:21,  6.67it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 59, Train perplexity: 565


                                                

Epoch: 59, Dev perplexity 606.678


 30%|██▉       | 59/200 [00:09<00:21,  6.69it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.46it/s]
                                                

Epoch: 60, Train perplexity: 565


                                                

Epoch: 60, Dev perplexity 606.181


 30%|███       | 60/200 [00:09<00:21,  6.61it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 61, Train perplexity: 564


                                                

Epoch: 61, Dev perplexity 605.873


 30%|███       | 61/200 [00:09<00:21,  6.62it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 62, Train perplexity: 563


                                                

Epoch: 62, Dev perplexity 605.315


 31%|███       | 62/200 [00:10<00:20,  6.60it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 63, Train perplexity: 564


                                                

Epoch: 63, Dev perplexity 605.000


 32%|███▏      | 63/200 [00:10<00:20,  6.69it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.71it/s]
                                                

Epoch: 64, Train perplexity: 562


                                                

Epoch: 64, Dev perplexity 604.306


 32%|███▏      | 64/200 [00:10<00:20,  6.66it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 65, Train perplexity: 563


                                                

Epoch: 65, Dev perplexity 603.626


 32%|███▎      | 65/200 [00:10<00:20,  6.74it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 66, Train perplexity: 561


                                                

Epoch: 66, Dev perplexity 602.396


 33%|███▎      | 66/200 [00:10<00:19,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 67, Train perplexity: 560


                                                

Epoch: 67, Dev perplexity 601.461


 34%|███▎      | 67/200 [00:10<00:19,  6.77it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 68, Train perplexity: 558


                                                

Epoch: 68, Dev perplexity 600.041


 34%|███▍      | 68/200 [00:10<00:19,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 69, Train perplexity: 557


                                                

Epoch: 69, Dev perplexity 599.071


 34%|███▍      | 69/200 [00:11<00:19,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 70, Train perplexity: 557


                                                

Epoch: 70, Dev perplexity 597.242


 35%|███▌      | 70/200 [00:11<00:19,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 71, Train perplexity: 554


                                                

Epoch: 71, Dev perplexity 595.723


 36%|███▌      | 71/200 [00:11<00:18,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 72, Train perplexity: 553


                                                

Epoch: 72, Dev perplexity 594.252


 36%|███▌      | 72/200 [00:11<00:18,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.56it/s]
                                                

Epoch: 73, Train perplexity: 551


                                                

Epoch: 73, Dev perplexity 592.983


 36%|███▋      | 73/200 [00:11<00:18,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 74, Train perplexity: 550


                                                

Epoch: 74, Dev perplexity 591.814


 37%|███▋      | 74/200 [00:11<00:18,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 75, Train perplexity: 549


                                                

Epoch: 75, Dev perplexity 588.923


 38%|███▊      | 75/200 [00:11<00:18,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 76, Train perplexity: 547


                                                

Epoch: 76, Dev perplexity 586.153


 38%|███▊      | 76/200 [00:12<00:17,  6.90it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 77, Train perplexity: 544


                                                

Epoch: 77, Dev perplexity 584.118


 38%|███▊      | 77/200 [00:12<00:17,  6.93it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 78, Train perplexity: 539


                                                

Epoch: 78, Dev perplexity 580.878


 39%|███▉      | 78/200 [00:12<00:17,  6.91it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.14it/s]
                                                

Epoch: 79, Train perplexity: 538


                                                

Epoch: 79, Dev perplexity 577.694


 40%|███▉      | 79/200 [00:12<00:17,  6.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 80, Train perplexity: 536


                                                

Epoch: 80, Dev perplexity 573.814


 40%|████      | 80/200 [00:12<00:17,  6.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 81, Train perplexity: 530


                                                

Epoch: 81, Dev perplexity 570.481


 40%|████      | 81/200 [00:12<00:17,  6.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 82, Train perplexity: 529


                                                

Epoch: 82, Dev perplexity 569.579


 41%|████      | 82/200 [00:12<00:17,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.56it/s]
                                                

Epoch: 83, Train perplexity: 526


                                                

Epoch: 83, Dev perplexity 564.102


 42%|████▏     | 83/200 [00:13<00:17,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 84, Train perplexity: 528


                                                

Epoch: 84, Dev perplexity 561.236


 42%|████▏     | 84/200 [00:13<00:16,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.48it/s]
                                                

Epoch: 85, Train perplexity: 523


                                                

Epoch: 85, Dev perplexity 567.065


 42%|████▎     | 85/200 [00:13<00:16,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 86, Train perplexity: 523


                                                

Epoch: 86, Dev perplexity 559.508


 43%|████▎     | 86/200 [00:13<00:16,  6.88it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.45it/s]
                                                

Epoch: 87, Train perplexity: 519


                                                

Epoch: 87, Dev perplexity 566.227


 44%|████▎     | 87/200 [00:13<00:16,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 88, Train perplexity: 523


                                                

Epoch: 88, Dev perplexity 554.684


 44%|████▍     | 88/200 [00:13<00:16,  6.86it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.47it/s]
                                                

Epoch: 89, Train perplexity: 520


                                                

Epoch: 89, Dev perplexity 554.660


 44%|████▍     | 89/200 [00:14<00:16,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 90, Train perplexity: 515


                                                

Epoch: 90, Dev perplexity 550.707


 45%|████▌     | 90/200 [00:14<00:15,  6.90it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 91, Train perplexity: 514


                                                

Epoch: 91, Dev perplexity 551.150


 46%|████▌     | 91/200 [00:14<00:15,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.82it/s]
                                                

Epoch: 92, Train perplexity: 512


                                                

Epoch: 92, Dev perplexity 548.748


 46%|████▌     | 92/200 [00:14<00:15,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 93, Train perplexity: 509


                                                

Epoch: 93, Dev perplexity 545.978


 46%|████▋     | 93/200 [00:14<00:15,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.56it/s]
                                                

Epoch: 94, Train perplexity: 507


                                                

Epoch: 94, Dev perplexity 540.876


 47%|████▋     | 94/200 [00:14<00:15,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 95, Train perplexity: 508


                                                

Epoch: 95, Dev perplexity 538.036


 48%|████▊     | 95/200 [00:14<00:15,  6.93it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 96, Train perplexity: 506


                                                

Epoch: 96, Dev perplexity 535.643


 48%|████▊     | 96/200 [00:15<00:15,  6.92it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 97, Train perplexity: 506


                                                

Epoch: 97, Dev perplexity 535.608


 48%|████▊     | 97/200 [00:15<00:14,  6.92it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.14it/s]
                                                

Epoch: 98, Train perplexity: 501


                                                

Epoch: 98, Dev perplexity 532.421


 49%|████▉     | 98/200 [00:15<00:14,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.45it/s]
                                                

Epoch: 99, Train perplexity: 497


                                                

Epoch: 99, Dev perplexity 529.767


 50%|████▉     | 99/200 [00:15<00:14,  6.74it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                

Epoch: 100, Train perplexity: 497


                                                

Epoch: 100, Dev perplexity 536.403


 50%|█████     | 100/200 [00:15<00:14,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 101, Train perplexity: 496


                                                 

Epoch: 101, Dev perplexity 524.796


 50%|█████     | 101/200 [00:15<00:14,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 102, Train perplexity: 489


                                                 

Epoch: 102, Dev perplexity 521.789


 51%|█████     | 102/200 [00:15<00:14,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 103, Train perplexity: 488


                                                 

Epoch: 103, Dev perplexity 518.818


 52%|█████▏    | 103/200 [00:16<00:14,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 104, Train perplexity: 482


                                                 

Epoch: 104, Dev perplexity 514.057


 52%|█████▏    | 104/200 [00:16<00:13,  6.86it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 105, Train perplexity: 484


                                                 

Epoch: 105, Dev perplexity 512.745


 52%|█████▎    | 105/200 [00:16<00:13,  6.92it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.13it/s]
                                                 

Epoch: 106, Train perplexity: 477


                                                 

Epoch: 106, Dev perplexity 509.479


 53%|█████▎    | 106/200 [00:16<00:13,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.13it/s]
                                                 

Epoch: 107, Train perplexity: 478


                                                 

Epoch: 107, Dev perplexity 505.118


 54%|█████▎    | 107/200 [00:16<00:13,  6.70it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.55it/s]
                                                 

Epoch: 108, Train perplexity: 479


                                                 

Epoch: 108, Dev perplexity 510.423


 54%|█████▍    | 108/200 [00:16<00:13,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.55it/s]
                                                 

Epoch: 109, Train perplexity: 474


                                                 

Epoch: 109, Dev perplexity 502.468


 55%|█████▍    | 109/200 [00:16<00:13,  6.70it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 110, Train perplexity: 474


                                                 

Epoch: 110, Dev perplexity 503.812


 55%|█████▌    | 110/200 [00:17<00:13,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 111, Train perplexity: 472


                                                 

Epoch: 111, Dev perplexity 498.623


 56%|█████▌    | 111/200 [00:17<00:13,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 112, Train perplexity: 469


                                                 

Epoch: 112, Dev perplexity 497.643


 56%|█████▌    | 112/200 [00:17<00:12,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 113, Train perplexity: 467


                                                 

Epoch: 113, Dev perplexity 493.496


 56%|█████▋    | 113/200 [00:17<00:12,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.31it/s]
                                                 

Epoch: 114, Train perplexity: 468


                                                 

Epoch: 114, Dev perplexity 496.422


 57%|█████▋    | 114/200 [00:17<00:12,  6.77it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 115, Train perplexity: 465


                                                 

Epoch: 115, Dev perplexity 489.338


 57%|█████▊    | 115/200 [00:17<00:12,  6.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 116, Train perplexity: 464


                                                 

Epoch: 116, Dev perplexity 489.241


 58%|█████▊    | 116/200 [00:17<00:12,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 117, Train perplexity: 459


                                                 

Epoch: 117, Dev perplexity 487.650


 58%|█████▊    | 117/200 [00:18<00:12,  6.87it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.37it/s]
                                                 

Epoch: 118, Train perplexity: 464


                                                 

Epoch: 118, Dev perplexity 492.987


 59%|█████▉    | 118/200 [00:18<00:12,  6.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.20it/s]
                                                 

Epoch: 119, Train perplexity: 463


                                                 

Epoch: 119, Dev perplexity 489.964


 60%|█████▉    | 119/200 [00:18<00:11,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 120, Train perplexity: 460


                                                 

Epoch: 120, Dev perplexity 485.814


 60%|██████    | 120/200 [00:18<00:11,  6.77it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 121, Train perplexity: 459


                                                 

Epoch: 121, Dev perplexity 484.853


 60%|██████    | 121/200 [00:18<00:11,  6.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 122, Train perplexity: 459


                                                 

Epoch: 122, Dev perplexity 482.194


 61%|██████    | 122/200 [00:18<00:11,  6.87it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.13it/s]
                                                 

Epoch: 123, Train perplexity: 460


                                                 

Epoch: 123, Dev perplexity 480.848


 62%|██████▏   | 123/200 [00:18<00:11,  6.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 124, Train perplexity: 458


                                                 

Epoch: 124, Dev perplexity 484.586


 62%|██████▏   | 124/200 [00:19<00:11,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 125, Train perplexity: 455


                                                 

Epoch: 125, Dev perplexity 478.178


 62%|██████▎   | 125/200 [00:19<00:11,  6.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 126, Train perplexity: 454


                                                 

Epoch: 126, Dev perplexity 484.487


 63%|██████▎   | 126/200 [00:19<00:10,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 127, Train perplexity: 455


                                                 

Epoch: 127, Dev perplexity 476.801


 64%|██████▎   | 127/200 [00:19<00:10,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 128, Train perplexity: 456


                                                 

Epoch: 128, Dev perplexity 476.221


 64%|██████▍   | 128/200 [00:19<00:10,  6.91it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.83it/s]
                                                 

Epoch: 129, Train perplexity: 453


                                                 

Epoch: 129, Dev perplexity 475.066


 64%|██████▍   | 129/200 [00:19<00:10,  6.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 130, Train perplexity: 452


                                                 

Epoch: 130, Dev perplexity 473.104


 65%|██████▌   | 130/200 [00:20<00:10,  6.89it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 131, Train perplexity: 450


                                                 

Epoch: 131, Dev perplexity 473.942


 66%|██████▌   | 131/200 [00:20<00:10,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.94it/s]
                                                 

Epoch: 132, Train perplexity: 451


                                                 

Epoch: 132, Dev perplexity 471.898


 66%|██████▌   | 132/200 [00:20<00:09,  6.86it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 133, Train perplexity: 450


                                                 

Epoch: 133, Dev perplexity 474.661


 66%|██████▋   | 133/200 [00:20<00:09,  6.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 134, Train perplexity: 451


                                                 

Epoch: 134, Dev perplexity 471.064


 67%|██████▋   | 134/200 [00:20<00:09,  6.84it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 135, Train perplexity: 447


                                                 

Epoch: 135, Dev perplexity 475.047


 68%|██████▊   | 135/200 [00:20<00:09,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.79it/s]
                                                 

Epoch: 136, Train perplexity: 448


                                                 

Epoch: 136, Dev perplexity 473.412


 68%|██████▊   | 136/200 [00:20<00:09,  6.74it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 137, Train perplexity: 449


                                                 

Epoch: 137, Dev perplexity 471.959


 68%|██████▊   | 137/200 [00:21<00:09,  6.72it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 138, Train perplexity: 447


                                                 

Epoch: 138, Dev perplexity 470.258


 69%|██████▉   | 138/200 [00:21<00:09,  6.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 139, Train perplexity: 449


                                                 

Epoch: 139, Dev perplexity 468.544


 70%|██████▉   | 139/200 [00:21<00:09,  6.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 140, Train perplexity: 449


                                                 

Epoch: 140, Dev perplexity 467.032


 70%|███████   | 140/200 [00:21<00:08,  6.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.48it/s]
                                                 

Epoch: 141, Train perplexity: 446


                                                 

Epoch: 141, Dev perplexity 469.995


 70%|███████   | 141/200 [00:21<00:08,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.19it/s]
                                                 

Epoch: 142, Train perplexity: 448


                                                 

Epoch: 142, Dev perplexity 467.418


 71%|███████   | 142/200 [00:21<00:08,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.19it/s]
                                                 

Epoch: 143, Train perplexity: 449


                                                 

Epoch: 143, Dev perplexity 465.554


 72%|███████▏  | 143/200 [00:21<00:08,  6.68it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.82it/s]
                                                 

Epoch: 144, Train perplexity: 445


                                                 

Epoch: 144, Dev perplexity 469.888


 72%|███████▏  | 144/200 [00:22<00:08,  6.69it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.56it/s]
                                                 

Epoch: 145, Train perplexity: 446


                                                 

Epoch: 145, Dev perplexity 466.142


 72%|███████▎  | 145/200 [00:22<00:08,  6.76it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 146, Train perplexity: 446


                                                 

Epoch: 146, Dev perplexity 468.212


 73%|███████▎  | 146/200 [00:22<00:07,  6.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 147, Train perplexity: 444


                                                 

Epoch: 147, Dev perplexity 466.059


 74%|███████▎  | 147/200 [00:22<00:07,  6.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 148, Train perplexity: 442


                                                 

Epoch: 148, Dev perplexity 465.693


 74%|███████▍  | 148/200 [00:22<00:07,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 149, Train perplexity: 443


                                                 

Epoch: 149, Dev perplexity 466.991


 74%|███████▍  | 149/200 [00:22<00:07,  6.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 150, Train perplexity: 442


                                                 

Epoch: 150, Dev perplexity 463.789


 75%|███████▌  | 150/200 [00:22<00:07,  6.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 151, Train perplexity: 440


                                                 

Epoch: 151, Dev perplexity 461.856


 76%|███████▌  | 151/200 [00:23<00:07,  6.82it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.70it/s]
                                                 

Epoch: 152, Train perplexity: 440


                                                 

Epoch: 152, Dev perplexity 460.655


 76%|███████▌  | 152/200 [00:23<00:07,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.32it/s]
                                                 

Epoch: 153, Train perplexity: 439


                                                 

Epoch: 153, Dev perplexity 461.194


 76%|███████▋  | 153/200 [00:23<00:06,  6.77it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 154, Train perplexity: 440


                                                 

Epoch: 154, Dev perplexity 461.085


 77%|███████▋  | 154/200 [00:23<00:06,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 35.48it/s]
                                                 

Epoch: 155, Train perplexity: 440


                                                 

Epoch: 155, Dev perplexity 460.231


 78%|███████▊  | 155/200 [00:23<00:06,  6.59it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.48it/s]
                                                 

Epoch: 156, Train perplexity: 439


                                                 

Epoch: 156, Dev perplexity 463.755


 78%|███████▊  | 156/200 [00:23<00:06,  6.58it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 157, Train perplexity: 442


                                                 

Epoch: 157, Dev perplexity 464.356


 78%|███████▊  | 157/200 [00:24<00:06,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 158, Train perplexity: 437


                                                 

Epoch: 158, Dev perplexity 459.803


 79%|███████▉  | 158/200 [00:24<00:06,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 159, Train perplexity: 437


                                                 

Epoch: 159, Dev perplexity 460.470


 80%|███████▉  | 159/200 [00:24<00:06,  6.72it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.56it/s]
                                                 

Epoch: 160, Train perplexity: 438


                                                 

Epoch: 160, Dev perplexity 460.792


 80%|████████  | 160/200 [00:24<00:05,  6.74it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.20it/s]
                                                 

Epoch: 161, Train perplexity: 436


                                                 

Epoch: 161, Dev perplexity 463.305


 80%|████████  | 161/200 [00:24<00:05,  6.64it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 162, Train perplexity: 440


                                                 

Epoch: 162, Dev perplexity 462.008


 81%|████████  | 162/200 [00:24<00:05,  6.72it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 163, Train perplexity: 442


                                                 

Epoch: 163, Dev perplexity 456.570


 82%|████████▏ | 163/200 [00:24<00:05,  6.67it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 164, Train perplexity: 440


                                                 

Epoch: 164, Dev perplexity 460.468


 82%|████████▏ | 164/200 [00:25<00:05,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 165, Train perplexity: 436


                                                 

Epoch: 165, Dev perplexity 467.669


 82%|████████▎ | 165/200 [00:25<00:05,  6.77it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.71it/s]
                                                 

Epoch: 166, Train perplexity: 442


                                                 

Epoch: 166, Dev perplexity 467.462


 83%|████████▎ | 166/200 [00:25<00:05,  6.66it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 35.18it/s]
                                                 

Epoch: 167, Train perplexity: 440


                                                 

Epoch: 167, Dev perplexity 461.063


 84%|████████▎ | 167/200 [00:25<00:05,  6.53it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.70it/s]
                                                 

Epoch: 168, Train perplexity: 438


                                                 

Epoch: 168, Dev perplexity 462.923


 84%|████████▍ | 168/200 [00:25<00:04,  6.53it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 169, Train perplexity: 441


                                                 

Epoch: 169, Dev perplexity 462.741


 84%|████████▍ | 169/200 [00:25<00:04,  6.60it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.84it/s]
                                                 

Epoch: 170, Train perplexity: 437


                                                 

Epoch: 170, Dev perplexity 457.189


 85%|████████▌ | 170/200 [00:25<00:04,  6.51it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 171, Train perplexity: 435


                                                 

Epoch: 171, Dev perplexity 456.101


 86%|████████▌ | 171/200 [00:26<00:04,  6.56it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 172, Train perplexity: 434


                                                 

Epoch: 172, Dev perplexity 456.502


 86%|████████▌ | 172/200 [00:26<00:04,  6.63it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 173, Train perplexity: 433


                                                 

Epoch: 173, Dev perplexity 453.428


 86%|████████▋ | 173/200 [00:26<00:04,  6.60it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.93it/s]
                                                 

Epoch: 174, Train perplexity: 432


                                                 

Epoch: 174, Dev perplexity 460.140


 87%|████████▋ | 174/200 [00:26<00:03,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 175, Train perplexity: 438


                                                 

Epoch: 175, Dev perplexity 459.000


 88%|████████▊ | 175/200 [00:26<00:03,  6.66it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.31it/s]
                                                 

Epoch: 176, Train perplexity: 433


                                                 

Epoch: 176, Dev perplexity 453.060


 88%|████████▊ | 176/200 [00:26<00:03,  6.51it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.31it/s]
                                                 

Epoch: 177, Train perplexity: 434


                                                 

Epoch: 177, Dev perplexity 457.626


 88%|████████▊ | 177/200 [00:27<00:03,  6.47it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.32it/s]
                                                 

Epoch: 178, Train perplexity: 432


                                                 

Epoch: 178, Dev perplexity 459.764


 89%|████████▉ | 178/200 [00:27<00:03,  6.49it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.13it/s]
                                                 

Epoch: 179, Train perplexity: 434


                                                 

Epoch: 179, Dev perplexity 457.180


 90%|████████▉ | 179/200 [00:27<00:03,  6.53it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.92it/s]
                                                 

Epoch: 180, Train perplexity: 437


                                                 

Epoch: 180, Dev perplexity 458.234


 90%|█████████ | 180/200 [00:27<00:03,  6.46it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 181, Train perplexity: 433


                                                 

Epoch: 181, Dev perplexity 452.311


 90%|█████████ | 181/200 [00:27<00:02,  6.55it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 182, Train perplexity: 431


                                                 

Epoch: 182, Dev perplexity 457.664


 91%|█████████ | 182/200 [00:27<00:02,  6.67it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 183, Train perplexity: 433


                                                 

Epoch: 183, Dev perplexity 457.339


 92%|█████████▏| 183/200 [00:27<00:02,  6.53it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 184, Train perplexity: 433


                                                 

Epoch: 184, Dev perplexity 452.372


 92%|█████████▏| 184/200 [00:28<00:02,  6.59it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 185, Train perplexity: 431


                                                 

Epoch: 185, Dev perplexity 457.529


 92%|█████████▎| 185/200 [00:28<00:02,  6.59it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.84it/s]
                                                 

Epoch: 186, Train perplexity: 429


                                                 

Epoch: 186, Dev perplexity 455.479


 93%|█████████▎| 186/200 [00:28<00:02,  6.59it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 187, Train perplexity: 434


                                                 

Epoch: 187, Dev perplexity 459.986


 94%|█████████▎| 187/200 [00:28<00:01,  6.67it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 188, Train perplexity: 432


                                                 

Epoch: 188, Dev perplexity 454.381


 94%|█████████▍| 188/200 [00:28<00:01,  6.69it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.83it/s]
                                                 

Epoch: 189, Train perplexity: 431


                                                 

Epoch: 189, Dev perplexity 455.952


 94%|█████████▍| 189/200 [00:28<00:01,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 190, Train perplexity: 430


                                                 

Epoch: 190, Dev perplexity 455.356


 95%|█████████▌| 190/200 [00:29<00:01,  6.67it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.31it/s]
                                                 

Epoch: 191, Train perplexity: 435


                                                 

Epoch: 191, Dev perplexity 453.699


 96%|█████████▌| 191/200 [00:29<00:01,  6.61it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 192, Train perplexity: 431


                                                 

Epoch: 192, Dev perplexity 449.229


 96%|█████████▌| 192/200 [00:29<00:01,  6.71it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 193, Train perplexity: 431


                                                 

Epoch: 193, Dev perplexity 452.526


 96%|█████████▋| 193/200 [00:29<00:01,  6.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 37.13it/s]
                                                 

Epoch: 194, Train perplexity: 426


                                                 

Epoch: 194, Dev perplexity 450.096


 97%|█████████▋| 194/200 [00:29<00:00,  6.68it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.19it/s]
                                                 

Epoch: 195, Train perplexity: 428


                                                 

Epoch: 195, Dev perplexity 451.569


 98%|█████████▊| 195/200 [00:29<00:00,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 196, Train perplexity: 428


                                                 

Epoch: 196, Dev perplexity 453.517


 98%|█████████▊| 196/200 [00:29<00:00,  6.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 36.13it/s]
                                                 

Epoch: 197, Train perplexity: 428


                                                 

Epoch: 197, Dev perplexity 454.100


 98%|█████████▊| 197/200 [00:30<00:00,  6.57it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 38.56it/s]
                                                 

Epoch: 198, Train perplexity: 430


                                                 

Epoch: 198, Dev perplexity 450.855


 99%|█████████▉| 198/200 [00:30<00:00,  6.53it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
 80%|████████  | 4/5 [00:00<00:00, 39.31it/s]
                                                 

Epoch: 199, Train perplexity: 429


                                                 

Epoch: 199, Dev perplexity 449.192


100%|█████████▉| 199/200 [00:30<00:00,  6.51it/s]
  0%|          | 0/5 [00:00<?, ?it/s]
                                                 

Epoch: 200, Train perplexity: 427


                                                 

Epoch: 200, Dev perplexity 448.182


100%|██████████| 200/200 [00:30<00:00,  6.60it/s]


Test perplexity 525.144
Input the first word or press Ctrl-C to exit: 鸟
鸟鼓聚身蝶，海气偃曲离，江寻有清涛渝。华何尘虎纷，中暮中但。渐无溃东。自醒旧交，开风人。论开头臣。未不花，凄歌四，红雪地，九权。毫贵妃我来，万簟锁循屏。梦弦彩边。有衣相，为风自，俗却倾，桃流久。闺日开吾，却受裂四，北山流虚。河海期不，来长席云疏。天道华头白，上识浴来。岁淹不使知，啼帆长梁邴。忽纷难天时何，。
Input the first word or press Ctrl-C to exit: 云
云颓读化生，大阴厥酒但，回流惟热，恨来斡之。石晓日来，之高羁皆，龙暮芙津。风梅一指，上自肯，雄将雪处。岸生相增骅，过日何上华，梦边过。悍掩无犹，秦流与宾。
Input the first word or press Ctrl-C to exit: 水
水商逃得海，自舜主华态。期自高稚奋。首来何鬼，缨陵勇行，宫相飞飞，天收亭。开阳头不，客具五时不。饮尘去家盘，朝开是，人白粲初鲸，何野焉流足火省。
Input the first word or press Ctrl-C to exit: 
 is not in vocabulary, choose by random.
兮人川至家，雾东人花。
Input the first word or press Ctrl-C to exit: 
 is not in vocabulary, choose by random.
觊蒙动父开作。云回马高不，从主浊陵。来亦众乐，曲有。
Input the first word or press Ctrl-C to exit: 
 is not in vocabulary, choose by random.
侍严展云古，牛路长歌，临兵识母，休秋不切，地去有，激知流，归风中。地城马先，家道山，天柳。西但花曲。东白军，手后极，千长都，孙更何，所人白
Input the first word or press Ctrl-C to exit: 
 is not in vocabulary, choose by random.
称策华，连所万，可江云争，泥汝蒯越混。飞腾冥事邑，台白举不荆。靖草河门，天钟中，香清海臣。岸点徒看，沾海翅。远回红衣，几凫，楚来来