In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import string

# Load and clean the text
with open("hamlet.txt", "r", encoding="utf-8") as f:
    text = f.read().lower()

# Keep only printable ASCII characters
text = ''.join([c for c in text if c in string.printable])
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(f"Unique characters: {vocab_size}")

# Char to index mappings
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for ch, i in char2idx.items()}

# Encode the entire text
encoded_text = [char2idx[c] for c in text]

# Hyperparameters
SEQ_LENGTH = 100
BATCH_SIZE = 64
HIDDEN_SIZE = 256
EMBED_DIM = 128
EPOCHS = 10
LR = 0.002

# Dataset preparation
class TextDataset(Dataset):
    def __init__(self, data, seq_len):
        self.data = data
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        seq = torch.tensor(self.data[idx:idx+self.seq_len])
        target = torch.tensor(self.data[idx+1:idx+self.seq_len+1])
        return seq, target

dataset = TextDataset(encoded_text, SEQ_LENGTH)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# LSTM Model
class CharLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size):
        super(CharLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embedding(x)
        output, hidden = self.lstm(x, hidden)
        logits = self.fc(output)
        return logits, hidden

# GRU Model
class CharGRU(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size):
        super(CharGRU, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.gru = nn.GRU(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embedding(x)
        output, hidden = self.gru(x, hidden)
        logits = self.fc(output)
        return logits, hidden

# Training function
def train(model, name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=LR)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(EPOCHS):
        total_loss = 0
        for x_batch, y_batch in dataloader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            output, _ = model(x_batch)
            loss = criterion(output.view(-1, vocab_size), y_batch.view(-1))
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f"[{name}] Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss/len(dataloader):.4f}")



Unique characters: 44


In [2]:
import pickle

with open("vocab.pkl", "wb") as f:
    pickle.dump((chars, char2idx, idx2char), f)


In [3]:
# Train LSTM
lstm_model = CharLSTM(vocab_size, EMBED_DIM, HIDDEN_SIZE)
train(lstm_model, "LSTM")


[LSTM] Epoch 1/10, Loss: 1.1616
[LSTM] Epoch 2/10, Loss: 0.5989
[LSTM] Epoch 3/10, Loss: 0.4568
[LSTM] Epoch 4/10, Loss: 0.3981
[LSTM] Epoch 5/10, Loss: 0.3670
[LSTM] Epoch 6/10, Loss: 0.3475
[LSTM] Epoch 7/10, Loss: 0.3334
[LSTM] Epoch 8/10, Loss: 0.3221
[LSTM] Epoch 9/10, Loss: 0.3136
[LSTM] Epoch 10/10, Loss: 0.3069


In [4]:

# Train GRU
gru_model = CharGRU(vocab_size, EMBED_DIM, HIDDEN_SIZE)
train(gru_model, "GRU")

[GRU] Epoch 1/10, Loss: 1.0817
[GRU] Epoch 2/10, Loss: 0.6991
[GRU] Epoch 3/10, Loss: 0.6142
[GRU] Epoch 4/10, Loss: 0.5670
[GRU] Epoch 5/10, Loss: 0.5351
[GRU] Epoch 6/10, Loss: 0.5124
[GRU] Epoch 7/10, Loss: 0.4959
[GRU] Epoch 8/10, Loss: 0.4810
[GRU] Epoch 9/10, Loss: 0.4696
[GRU] Epoch 10/10, Loss: 0.4604


In [5]:
def generate_text(model, seed_text, length=200):
    model.eval()
    device = next(model.parameters()).device
    input_seq = torch.tensor([char2idx.get(c, 0) for c in seed_text.lower()], dtype=torch.long).unsqueeze(0).to(device)
    hidden = None
    result = seed_text

    for _ in range(length):
        with torch.no_grad():
            output, hidden = model(input_seq[:, -SEQ_LENGTH:], hidden)
            probs = torch.softmax(output[:, -1, :], dim=-1)
            predicted_idx = torch.multinomial(probs, num_samples=1).item()
            result += idx2char[predicted_idx]
            input_seq = torch.cat([input_seq, torch.tensor([[predicted_idx]], device=device)], dim=1)

    return result


In [6]:
print(generate_text(lstm_model, "to be, or not to be", 300))
print(generate_text(gru_model, "hamlet", 300))

to be, or not to be a flowers are our king,
to sleepe the consonanca. who is that they knew withall

   hora. he did loue, my lord?
  ham. oh wonderfull!
  hor. good frame the water habite of encounter, a better. mother, man deare this infected,
thy loues, friends to his mothers must be so,
hamlet, my noble father sel
hamlet. but if you now speake: but good euen sir

   ham. from him. 'tis very shall
not reuerfections in his paintment with his life, a puffes in his
reuionings dismantled watchment, did out
the lings and polition of death, a very well akenstand knees, and all that we haue seene
from my sword, that is not


In [7]:
# Define save paths
lstm_save_path = "lstm_model.pth"
gru_save_path = "gru_model.pth"


torch.save(lstm_model.state_dict(), lstm_save_path)
torch.save(gru_model.state_dict(), gru_save_path)