In [19]:
import torch

import torch.nn as nn 

import torch.nn.functional as F

import numpy as np

In [20]:
# Sample corpus

sentences = [
    "deep learning is powerful",
    "deep learning is fun",
    "learning is fun and powerful"
]

In [21]:
# Tokenization

words = list(set(" ".join(sentences).split()))

word_to_idx = {word: idx for idx, word  in enumerate(words)}

idx_to_word = {idx: word for word, idx in word_to_idx.items()}

vocab_size = len(word_to_idx)

In [22]:
# Prepare data (3-word input → 1-word output)

def make_sequences(sentences, context_size=3):

    input_seqs, target_words = [], []

    for sentence in sentences:

        tokens = sentence.split()

        if len(tokens) >= context_size + 1:

            for i in range(len(tokens) - context_size):

                context = tokens[i:context_size]

                target = tokens[i + context_size]

                input_seqs.append(torch.tensor([word_to_idx[w] for w in context]))

                target_words.append(torch.tensor(word_to_idx[target]))

                return input_seqs, target_words
            
input_seqs, target_words = make_sequences(sentences)

In [23]:
# Stacked LSTM model

class StackedLSTM(nn.Module):

    def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers = 2 ):
        super(StackedLSTM, self).__init__()


        self.embedding = nn.Embedding(vocab_size, embed_dim)

        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers=num_layers)

        self.fc = nn.Linear(hidden_dim, vocab_size)

        self.num_layers = num_layers

        self.hidden_dim = hidden_dim

    

    def forward(self, x, hidden):

        x = self.embedding(x)

        x = x.permute(1, 0, 2)  # [seq_len, batch, embed_dim]

        out, hidden = self.lstm(x, hidden)

        out = self.fc(out[-1])  # last time step

        return out, hidden
    
    def init_hidden(self):

            # Initialize hidden state for all layers

            return (torch.zeros(self.num_layers, 1, self.hidden_dim),
                    torch.zeros(self.num_layers, 1, self.hidden_dim))

In [24]:
# Hyperparams

embedding_dim = 10

hidden_dim = 16

num_layers = 2

model = StackedLSTM(vocab_size, embedding_dim, hidden_dim, num_layers)

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [25]:
# Training

for epoch in range(1, 301):

    total_loss = 0.0

    model.train()

    for inp, target in zip(input_seqs, target_words):

        inp = inp.unsqueeze(0)

        target = target.unsqueeze(0)

        hidden = model.init_hidden()

        optimizer.zero_grad()

        output, _ = model(inp, hidden)

        loss = loss_fn(output, target)

        loss.backward()

        optimizer.step()

        total_loss += loss.item()

    if epoch % 50 == 0:

            print(f"Epoch {epoch}/300, Loss: {total_loss:.4f}")

Epoch 50/300, Loss: 0.0014
Epoch 100/300, Loss: 0.0007
Epoch 150/300, Loss: 0.0005
Epoch 200/300, Loss: 0.0003
Epoch 250/300, Loss: 0.0003
Epoch 300/300, Loss: 0.0002


In [26]:
# Text generation (greedy)

def generate_text(model, seed_words, length=5):

    model.eval()

    generated = seed_words[:]

    hidden = model.init_hidden()

    for _ in range(length):

        input_seq = [word_to_idx[w] for w in generated[-3:]]

        input_tensor = torch.tensor(input_seq).unsqueeze(0)

        output, hidden = model(input_tensor, hidden)

        next_idx = torch.argmax(output, dim=1).item()

        generated.append(idx_to_word[next_idx])

    return " ".join(generated)

In [27]:
# Test generation

print("\nGenerated:")

print(generate_text(model, ['deep', 'learning', 'is']))


Generated:
deep learning is powerful powerful powerful powerful powerful
