## Install

In [0]:
!pip3 install torch torchvision numpy

## Imports

In [0]:
from matplotlib import pyplot as plt
import numpy as np

import torch as th
from torch import nn

from pytorch.examples.yunjey import ptb_data

## Config

In [23]:
device = th.device('cuda' if th.cuda.is_available() else 'cpu')
print(f'Using {device}')


embed_size = 128
hidden_size = 1024
num_layers = 1


num_epochs = 5
num_samples = 1000  # number of words to sample during eval.
batch_size = 20
seq_length = 30
learning_rate = 0.002

Using cuda


## Dataset (Penn Treebank)

In [0]:
corpus = ptb_data.Corpus()
ids = corpus.get_data('pytorch/examples/yunjey/ptb_data/train.txt', batch_size)
vocab_size = len(corpus.dictionary)
num_batches = ids.size(1) // seq_length

In [20]:
print(ids.shape)
print(vocab_size)
print(num_batches)

torch.Size([20, 46479])
10000
1549


## Model (RNN LM)

In [0]:
class RNNLM(nn.Module):
  def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
    super(RNNLM, self).__init__()
    self.embed = nn.Embedding(vocab_size, embed_size)
    self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, vocab_size)
    
  def forward(self, x, h):
    # Embed word ids to vectors.
    x = self.embed(x)
    
    # Forward propagate LSTM.
    out, (h, c) = self.lstm(x, h)
    
    # Reshape output to (batch_size * seq_length, hidden_size).
    out = out.reshape(-1, out.size(2))
    
    # Decode the hidden states of all time steps.
    out = self.fc(out)
    return out, (h, c)

model = RNNLM(vocab_size, embed_size, hidden_size, num_layers).to(device)

## Train

In [22]:
# Loss and optimizer.
loss_fn = nn.CrossEntropyLoss()
optimizer = th.optim.Adam(model.parameters(), lr=learning_rate)

# Truncated backpropagation.
def detach(states):
  return [state.detach() for state in states]

# Train.
for epoch in range(num_epochs):
  # Set initial hidden and cell states.
  states = (th.zeros(num_layers, batch_size, hidden_size).to(device),
            th.zeros(num_layers, batch_size, hidden_size).to(device))
  
  for i in range(0, ids.size(1) - seq_length, seq_length):
    inputs = ids[:, i:i+seq_length].to(device)
    targets = ids[:, i+1:i+1+seq_length].to(device)

    # Forward
    states = detach(states)
    outputs, states = model(inputs, states)
    loss = loss_fn(outputs, targets.reshape(-1))
    
    # Backward
    optimizer.zero_grad()
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimizer.step()
    
    step = (i + 1) // seq_length
    if step % 100 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Step [{step}/{num_batches}], '
            f'Loss: {loss.item():.4}, '
            f'Perplexity: {np.exp(loss.item()):5.2f}')  

Epoch [1/5], Step [0/1549], Loss: 9.21, Perplexity: 9995.55
Epoch [1/5], Step [100/1549], Loss: 5.984, Perplexity: 397.11
Epoch [1/5], Step [200/1549], Loss: 5.942, Perplexity: 380.69
Epoch [1/5], Step [300/1549], Loss: 5.753, Perplexity: 315.04
Epoch [1/5], Step [400/1549], Loss: 5.707, Perplexity: 300.88
Epoch [1/5], Step [500/1549], Loss: 5.152, Perplexity: 172.82
Epoch [1/5], Step [600/1549], Loss: 5.138, Perplexity: 170.43
Epoch [1/5], Step [700/1549], Loss: 5.327, Perplexity: 205.77
Epoch [1/5], Step [800/1549], Loss: 5.189, Perplexity: 179.36
Epoch [1/5], Step [900/1549], Loss: 5.057, Perplexity: 157.07
Epoch [1/5], Step [1000/1549], Loss: 5.107, Perplexity: 165.14
Epoch [1/5], Step [1100/1549], Loss: 5.321, Perplexity: 204.52
Epoch [1/5], Step [1200/1549], Loss: 5.191, Perplexity: 179.66
Epoch [1/5], Step [1300/1549], Loss: 5.084, Perplexity: 161.35
Epoch [1/5], Step [1400/1549], Loss: 4.873, Perplexity: 130.69
Epoch [1/5], Step [1500/1549], Loss: 5.193, Perplexity: 180.05
Epoc

Epoch [4/5], Step [800/1549], Loss: 3.28, Perplexity: 26.57
Epoch [4/5], Step [900/1549], Loss: 3.011, Perplexity: 20.30
Epoch [4/5], Step [1000/1549], Loss: 3.131, Perplexity: 22.89
Epoch [4/5], Step [1100/1549], Loss: 3.171, Perplexity: 23.83
Epoch [4/5], Step [1200/1549], Loss: 3.273, Perplexity: 26.39
Epoch [4/5], Step [1300/1549], Loss: 3.004, Perplexity: 20.16
Epoch [4/5], Step [1400/1549], Loss: 2.718, Perplexity: 15.15
Epoch [4/5], Step [1500/1549], Loss: 3.189, Perplexity: 24.26
Epoch [5/5], Step [0/1549], Loss: 3.078, Perplexity: 21.71
Epoch [5/5], Step [100/1549], Loss: 2.848, Perplexity: 17.25
Epoch [5/5], Step [200/1549], Loss: 3.133, Perplexity: 22.94
Epoch [5/5], Step [300/1549], Loss: 3.111, Perplexity: 22.45
Epoch [5/5], Step [400/1549], Loss: 3.035, Perplexity: 20.81
Epoch [5/5], Step [500/1549], Loss: 2.602, Perplexity: 13.49
Epoch [5/5], Step [600/1549], Loss: 3.064, Perplexity: 21.41
Epoch [5/5], Step [700/1549], Loss: 2.906, Perplexity: 18.28
Epoch [5/5], Step [80

## Test

In [24]:
sampled_text = ''

with th.no_grad():
  # Set initial hidden and cell states.
  state = (th.zeros(num_layers, 1, hidden_size).to(device),
           th.zeros(num_layers, 1, hidden_size).to(device))
  
  # Initialize by selecting one word id randomly.
  prob = th.ones(vocab_size)
  inputs = th.multinomial(prob, num_samples=1).unsqueeze(1).to(device)
  
  for i in range(num_samples):
    # Forward propagate RNN.
    output, state = model(inputs, state)
    
    # Sample a word id.
    prob = output.exp()
    word_id = th.multinomial(prob, num_samples=1).item()
    
    # Fill input with sampled word id for next time step.
    inputs.fill_(word_id)
    
    word = corpus.dictionary.idx2word[word_id]
    word = '\n' if word == '<eos>' else word + ' '
    sampled_text += word

print(sampled_text)

of portfolios today 
<unk> by physician <unk> a working small with new <unk> a greatest bank to head of a <unk> N portugal offset in the early years 
federal 's a <unk> $ N billion robot spacecraft fund opens plc 's equal employment since the old guard hired the <unk> 
services a woman assigned at a department of health and human services from an adequate independent agency began investigating makers <unk> in december 
the patients wo n't be solved or even talked by the <unk> sea containers 
he says a lot could act from the use 
the arab league pledged the west to american express and robert goldberg would be able to repay the company 's N guilders launch a series of communications inc. in connection with the transactions future and <unk> management co. of the current junk-bond market for sometime corp. it will become a presentation this year 
mr. stone owns never which hope that <unk> period of primary property duties to drop as stock investments in <unk> fema 
but as banks seek profi

## Save model

In [0]:
th.save(model.state_dict(), '/tmp/rnnlm_ptb.ckpt')