In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from torch.utils.tensorboard import SummaryWriter

if(not torch.cuda.is_available()):
    raise Exception('cuda not available')

DEVICE = torch.device('cuda')

In [2]:
# Load the PTB dataset
text = open("ptb.train.txt", "r").read() # Replace this with the actual path to the PTB dataset
text = text.split()

# Prepare the vocabulary and create mapping from words to integers
vocab = set(text)
print('len(vocab)', len(vocab))
word_to_int = {word: ii for ii, word in enumerate(vocab)}
int_to_word = {ii: word for ii, word in enumerate(vocab)}

# Convert the text data to integer form
encoded = np.array([word_to_int[word] for word in text])

len(vocab) 9999


In [3]:
def decode(tokens):
    return [int_to_word[token] for token in tokens]

In [4]:
# Split the encoded text into chunks of length `seq_length`
seq_length = 64
data = []
target = []
for i in range(0, len(encoded) - seq_length):
    data.append(encoded[i: i + seq_length])
    target.append(encoded[i + seq_length])

In [11]:
# Convert the data and target to tensors
data = torch.tensor(data, device=DEVICE).long()
target = torch.tensor(target, device=DEVICE).long()

print(data.shape)
print(target.shape)

# Create a TensorDataset from data and target tensors
dataset = TensorDataset(data, target)

# Create a DataLoader from the TensorDataset
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, drop_last=True)

  data = torch.tensor(data, device=DEVICE).long()
  target = torch.tensor(target, device=DEVICE).long()


torch.Size([887457, 64])
torch.Size([887457])


In [13]:
# Define the language model
class LM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, n_layers):
        super(LM, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        y = self.embedding(x)
        y, _ = self.lstm(y, hidden)
        y = y[:,-1,:]
        y = self.fc(y)
        return y, _
    
    def init_hidden(self, batch_size):
        return (torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(DEVICE),
                torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(DEVICE))

# Initialize the language model
model = LM(len(vocab), 128, 512, 2)
model.cuda()

writer = SummaryWriter('runs/ptb')

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Train the language model
n_epochs = 300
for epoch in range(n_epochs):
    hidden = model.init_hidden(32)
    for i, batch in enumerate(dataloader):
        x, y = batch
        optimizer.zero_grad()
        output, _ = model(x, hidden)
        
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print(f'Epoch [{epoch + 1}/{n_epochs}], Step [{i + 1}/{len(dataloader)}], Loss: {loss.item():.4f}')

    writer.add_scalar('Loss/train', loss.item(), epoch)
    torch.save(model, f'model_epoch_{epoch}.pt')


Epoch [1/300], Step [100/27733], Loss: 7.8342
Epoch [1/300], Step [200/27733], Loss: 7.5727
Epoch [1/300], Step [300/27733], Loss: 6.6153
Epoch [1/300], Step [400/27733], Loss: 6.7830
Epoch [1/300], Step [500/27733], Loss: 7.5079
Epoch [1/300], Step [600/27733], Loss: 6.0225
Epoch [1/300], Step [700/27733], Loss: 7.6855
Epoch [1/300], Step [800/27733], Loss: 7.6249
Epoch [1/300], Step [900/27733], Loss: 6.1625
Epoch [1/300], Step [1000/27733], Loss: 6.5600
Epoch [1/300], Step [1100/27733], Loss: 6.5568
Epoch [1/300], Step [1200/27733], Loss: 5.2681
Epoch [1/300], Step [1300/27733], Loss: 6.8257
Epoch [1/300], Step [1400/27733], Loss: 6.2989
Epoch [1/300], Step [1500/27733], Loss: 6.8791
Epoch [1/300], Step [1600/27733], Loss: 5.8289
Epoch [1/300], Step [1700/27733], Loss: 6.8065
Epoch [1/300], Step [1800/27733], Loss: 5.7066
Epoch [1/300], Step [1900/27733], Loss: 6.9660
Epoch [1/300], Step [2000/27733], Loss: 6.5602
Epoch [1/300], Step [2100/27733], Loss: 6.6538
Epoch [1/300], Step [2

KeyboardInterrupt: 

In [44]:
model = torch.load('model_epoch_110.pt')
model.eval()

hidden = model.init_hidden(1)

sentence = 'The best way'.lower()

tokens = [word_to_int[word] for word in sentence.split()]
tokens = torch.tensor(tokens, device=DEVICE).long().view(1, -1)

for i in range(100):
    output, _ = model(tokens, hidden)
    prediction = output.argmax(dim=1)
    tokens = torch.cat((tokens, prediction.view(1, -1)), dim=1)

print(' '.join(decode(tokens[0].tolist())))



the best way of exporters and freeze a registered ceiling but the rush is likely to be far less <unk> is projected to add to N hours of gasoline supplies to the housing and effect of young & rubicam said the nation 's total could n't be reached for comment the transportation department said it is difficult to secure within three years to repair programs and <unk> the centers for <unk> programs to address the <unk> of the <unk> <unk> coastal development and <unk> the project is already <unk> by the mexican nuclear refinery houston property is likely to be transcanada pipelines the


In [45]:
model = torch.load('model_epoch_110.pt')
model.cpu()
torch.save(model, 'model_epoch_110_cpu.pt')