# Text generation with RNN

<img src="./img/text_generation.png" alt="text_generation.png" style="width: 600px;"/>

In [1]:
import torch
import torch.nn as nn

## RNN model to predict next character

In [3]:
data = "The cat sat on the mat"
chars = list(set(data))

In [4]:
chars

['n', 'e', 's', 'h', 'a', 'c', 'm', ' ', 't', 'o', 'T']

In [5]:
char_to_idx = {char:idx for idx, char in enumerate(chars)}
idx_to_char = {idx:char for idx, char in enumerate(chars)}

In [14]:
inputs = [char_to_idx[ch] for ch in data[:-1]]
targets = [char_to_idx[ch] for ch in data[1:]]

In [10]:
inputs

[10, 3, 1, 7, 5, 4, 8, 7, 2, 4, 8, 7, 9, 0, 7, 8, 3, 1, 7, 6, 4]

In [11]:
targets

[3, 1, 7, 5, 4, 8, 7, 2, 4, 8, 7, 9, 0, 7, 8, 3, 1, 7, 6, 4, 8]

Reshape inputs to have an additional dimension and match the expected input shape for the model.

In [15]:
inputs = torch.tensor(inputs, dtype=torch.long).view(-1,1)
inputs.shape

torch.Size([21, 1])

The inputs tensor is one-hot encoded, turning each index into a binary vector, where all elements are zero except for the one at the position of the index.

In [16]:
inputs = nn.functional.one_hot(inputs, num_classes=len(chars)).float()
inputs.shape

torch.Size([21, 1, 11])

The targets tensor remains as character indices to align with CrossEntropyLoss, which requires class indices as targets.

In [17]:
targets = torch.tensor(targets, dtype=torch.long)
targets.shape

torch.Size([21])

In [29]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers=1):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)        
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :] 
        out = self.fc(out)
        return out
        
# Initialize the model
rnn_model = RNNModel(len(chars), 16, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn_model.parameters(), lr=0.01)

In [40]:
# Train the model for ten epochs and zero the gradients
for epoch in range(100):
    # switch model to training mode
    rnn_model.train()
    # fit inputs to the model
    outputs = rnn_model(inputs)
    # calculate loss
    loss = criterion(outputs, targets)
    # clear gradients
    optimizer.zero_grad()
    # backpropagation
    loss.backward()
    # weight update
    optimizer.step()

    if (epoch % 10) == 0:
        print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.4756394922733307
Epoch: 11, Loss: 0.4755782186985016
Epoch: 21, Loss: 0.4755208194255829
Epoch: 31, Loss: 0.47546687722206116
Epoch: 41, Loss: 0.4754161834716797
Epoch: 51, Loss: 0.47536832094192505
Epoch: 61, Loss: 0.475323349237442
Epoch: 71, Loss: 0.4752808213233948
Epoch: 81, Loss: 0.4752406179904938
Epoch: 91, Loss: 0.47520264983177185


In [41]:
# Testing model
rnn_model.eval()
test_input = char_to_idx['a']
test_input = nn.functional.one_hot(torch.tensor(test_input).view(-1,1), num_classes=len(chars)).float()

In [45]:
predicted_output = rnn_model(test_input)
predicted_char_idx = torch.argmax(predicted_output, 1).item()
print('Predicted next character: ', idx_to_char[predicted_char_idx])

Predicted next character:  t
