# Shakespear Char prediction using GRU

# 1. Import libraries needed

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

# 2. Load data from file, create a char to index map and an index to char map

In [2]:
# -------------------------------
# Load data
# -------------------------------
df = pd.read_csv('data/Shakespeare_data.csv')
data = ' '.join(df['PlayerLine'].astype(str))

chars = sorted(list(set(data)))
vocab_size = len(chars)

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

# 3. Create a GRU class for Char

In [3]:
# -------------------------------
# Model definition
# -------------------------------
class CharGRU(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embed = nn.Embedding(vocab_size, vocab_size)  # one-hot embedding
        self.gru = nn.GRU(vocab_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embed(x)  # [batch, seq, vocab_size]
        out, hidden = self.gru(x, hidden)
        out = self.fc(out)  # [batch, seq, vocab_size]
        return out, hidden

# -------------------------------
# Hyperparameters
# -------------------------------
hidden_size = vocab_size
seq_length = 25
lr = 1e-2
num_iters = 10000

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CharGRU(vocab_size, hidden_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# 4. Train the GRU using data loaded and generated Shakespear style text during training to see how the model performs during training

In [4]:
def get_batch(p, seq_length):
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).unsqueeze(0)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).unsqueeze(0)
    return inputs.to(device), targets.to(device)


def sample(model, start_ix, length=200):
    model.eval()
    ixes = [start_ix]
    input = torch.tensor([[start_ix]], dtype=torch.long).to(device)
    hidden = None
    for _ in range(length):
        output, hidden = model(input, hidden)
        probs = torch.softmax(output[:, -1, :], dim=-1).detach().cpu().numpy().ravel()
        ix = torch.multinomial(torch.tensor(probs), 1).item()
        ixes.append(ix)
        input = torch.tensor([[ix]], dtype=torch.long).to(device)
    return ''.join(ix_to_char[i] for i in ixes)

# -------------------------------
# Helper functions
# -------------------------------
def get_batch(p, seq_length):
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).unsqueeze(0)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).unsqueeze(0)
    return inputs.to(device), targets.to(device)


def sample(model, start_ix, length=200):
    model.eval()
    ixes = [start_ix]
    input = torch.tensor([[start_ix]], dtype=torch.long).to(device)
    hidden = None
    for _ in range(length):
        output, hidden = model(input, hidden)
        probs = torch.softmax(output[:, -1, :], dim=-1).detach().cpu().numpy().ravel()
        ix = torch.multinomial(torch.tensor(probs), 1).item()
        ixes.append(ix)
        input = torch.tensor([[ix]], dtype=torch.long).to(device)
    return ''.join(ix_to_char[i] for i in ixes)


# -------------------------------
# Training loop
# -------------------------------
p = 0
for n in range(num_iters):
    if p + seq_length + 1 >= len(data):
        p = 0

    inputs, targets = get_batch(p, seq_length)

    optimizer.zero_grad()
    outputs, _ = model(inputs)

    loss = criterion(outputs.squeeze(0), targets.squeeze(0))
    loss.backward()
    optimizer.step()

    if n % 1000 == 0:
        print(f"Iter {n}, Loss {loss.item():.4f}")
        sample_ix = sample(model, inputs[0, 0].item(), 200)
        print("----\n" + sample_ix + "\n----")
        model.train()  # <--- ADD THIS

    p += seq_length

Iter 0, Loss 4.2934
----
Awy8XbRGJ[KBG$YCL,y7NJA8.pm-lKu57eerlzTy$k?u2]JEnYoSpe0a8InGgnHflYM.oGE6'pdOVFp,EhdJbi1tVTf.5,!]W.2OV'RkJdQakX9[IhLMowUh?X98G!w]I]m:l?4TBsw3xRA$y02]99[)xkBiILEeeTJB.	G'bZor.tiDfLq3c II.d85SnqyfDH5-Xqr8
----
Iter 1000, Loss 2.7859
----
untime, Whe yor you cmammy, I day stlals nortuntne. Yous doind yoortelfsh I divis held grue to gcord stit if On is do,, truee yoncesur, I wagl hal, Not to daSs sciis Detoll ut bak dond sco ul dour cizp
----
Iter 2000, Loss 2.1692
----
t tes no ti. But in am in even, heven no seve hon! 'whe croleking four in frts le elon wite heis coldar me il ame hele ceirs fare for in theim four kintrink ens in? pest thee and in buck? alde me mas i
----
Iter 3000, Loss 1.8237
----
h too dooth! A ave sandy a sucwlnberoothiald me sit arty inrots I'llroth-e, a sook, and In piret me winks your trueming and sitt't shet and the if, and 'to no thist tiven Ann your eave shearthe, ye of 
----
Iter 4000, Loss 3.7332
----
Ey ber whore ant sopin thelet't tye

# 5. Conclusion