In [93]:
import string
import random
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import math

In [94]:
all_chars       = string.printable
n_chars         = len(all_chars)
file            = open('../Data/shakespeare.txt').read()
file_len        = len(file)

print('Length of file: {}'.format(file_len))
print('All possible characters: {}'.format(all_chars))
print('Number of all possible characters: {}'.format(n_chars))

Length of file: 1115394
All possible characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

Number of all possible characters: 100


In [95]:
# Get a random sequence of the Shakespeare dataset
def get_random_seq(seq_len=128):
    start_index = random.randint(0, file_len - seq_len)
    end_index = start_index + seq_len + 1
    return file[start_index:end_index]

# Convert the sequence to one-hot tensor
def seq_to_onehot(seq):
    tensor = torch.zeros(len(seq), n_chars,dtype=torch.long) 
    for t, char in enumerate(seq):
        index = all_chars.index(char)
        tensor[t][index] = 1
    print("Tensor shape",tensor.shape)
    return tensor

# Convert the sequence to index tensor
def seq_to_index(seq):
    tensor = torch.zeros(len(seq), dtype=torch.long)
    for t, char in enumerate(seq):
        tensor[t] = all_chars.index(char)
    return tensor

# Sample a mini-batch including input tensor and target tensor
def get_input_and_target(seq_len=128):
    seq = get_random_seq(seq_len)
    input_tensor = seq_to_onehot(seq[:-1])  # Input is represented in one-hot
    # print("Shape after",input_tensor.shape)
    target_tensor = seq_to_index(seq[1:])   # Target is represented in index
    return input_tensor, target_tensor  # Add batch dimension


In [96]:
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, num_layers, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.pos_encoder = nn.Parameter(torch.zeros(1, 100, embed_size))  # Positional encoding
        encoder_layers = nn.TransformerEncoderLayer(embed_size, num_heads, dim_feedforward=512, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc_out = nn.Linear(embed_size, vocab_size)

    def forward(self, src):
        src = self.embed(src) + self.pos_encoder[:, :src.size(1), :]
        src = self.transformer_encoder(src)
        output = self.fc_out(src)
        return output

In [97]:
embed_size = 32
num_heads = 2
num_layers = 2
dropout = 0.1

model = TransformerModel(n_chars, embed_size, num_heads, num_layers, dropout)



In [98]:
num_epochs = 5
lr = 0.005
all_losses = []
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    input_tensor, target_tensor = get_input_and_target()
    
    # output = model(input_tensor)
    output = output.view(-1, n_chars)  # [batch_size * seq_len, n_chars]
    target_tensor = target_tensor.view(-1)  # [batch_size * seq_len]
    loss = criterion(output.view(-1, n_chars), target_tensor)
    
    all_losses.append(loss)

    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

Tensor shape torch.Size([128, 100])


ValueError: Expected input batch_size (12800) to match target batch_size (128).

In [None]:
ouput