# Recurrent Neural Network (RNN)

RNN is a type of artificial neural network designed for processing sequential data, such as time series, natural language, or speech. Unlike traditional feedforward neural networks, RNNs have a "memory" that allows them to use information from previous inputs by passing it through a loop, making them well-suited for tasks where context or order matters.  It comes before Transformers and is used widely in text generation, speech recognition, and time series forecasting (stock price forecast).

##  Toy RNN Example

This simplified example trains an RNN to predict the next character in the word "hello".
1. **Model Definition**: 
   - `nn.RNN` handles the recurrent computation.
   - A fully connected layer (`fc`) maps the hidden state to the output (character predictions).
2. **Data**: 
   - We use "hell" as input and expect "ello" as output (shifting the sequence).
   - Characters are converted to one-hot vectors (e.g., 'h' → [1, 0, 0, 0]).
3. **Training**: 
   - The model learns by minimizing the cross-entropy loss between predicted and target characters.
4. **Prediction**: 
   - After training, the model predicts the next characters.

In [None]:
import torch
import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Hyperparameters
input_size = 4   # Number of unique characters (h, e, l, o)
hidden_size = 8  # Size of the hidden state
output_size = 4  # Same as input_size
learning_rate = 0.01

# Character vocabulary
chars = ['h', 'e', 'l', 'o']
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

# Input data: "hell" to predict "ello"
input_seq = "hell"
target_seq = "ello"

# Convert to one-hot encoding with explicit batch dimension
def to_one_hot(seq):
    tensor = torch.zeros(1, len(seq), input_size)  # [batch_size, seq_len, input_size]
    for t, char in enumerate(seq):
        tensor[0][t][char_to_idx[char]] = 1  # Batch size = 1
    return tensor

# Prepare input and target tensors
input_tensor = to_one_hot(input_seq)  # Shape: [1, 4, 4]
print("Input tensor shape:", input_tensor.shape)
target_tensor = torch.tensor([char_to_idx[ch] for ch in target_seq], dtype=torch.long)  # Shape: [4]

# Initialize the model, loss, and optimizer
model = SimpleRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(100):
    hidden = model.init_hidden(1)  # Batch size = 1
    print("Hidden state shape:", hidden.shape)  # Should be [1, 1, 8]
    optimizer.zero_grad()
    output, hidden = model(input_tensor, hidden)  # output: [1, 4, 4], hidden: [1, 1, 8]
    
    loss = criterion(output.squeeze(0), target_tensor)  # output.squeeze(0): [4, 4], target: [4]
    loss.backward()
    optimizer.step()
    
    if epoch % 20 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

# Test the model
with torch.no_grad():
    hidden = model.init_hidden(1)
    output, hidden = model(input_tensor, hidden)
    predicted_idx = torch.argmax(output, dim=2).squeeze().tolist()
    predicted_chars = ''.join([idx_to_char[idx] for idx in predicted_idx])
    print(f"Input: {input_seq}")
    print(f"Predicted: {predicted_chars}")

Input tensor shape: torch.Size([1, 4, 4])
Hidden state shape: torch.Size([1, 1, 8])
Epoch 0, Loss: 1.2936
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Hidden state shape: torch.Size([1, 1, 8])
Epoch 20, Loss: 0.6442
Hidden state shape: torch.Size(