In [9]:
import torch

import torch.nn as nn

import torch.nn.functional as F 

import numpy as np

1. Dataset Preparation

In [10]:
# Tiny corpus

corpus = [
    "deep learning is powerful",
    "learning is fun and powerful",
    "deep learning is fun"
]


In [11]:
# Tokenize

tokens = " ".join(corpus).split()

vocab = sorted(set(tokens))

word_to_idx = {word: i for i, word in enumerate(vocab)}

idx_to_word = {i: word for word, i in word_to_idx.items()}

In [12]:
# Convert to sequences

seq_len = 3

data = []

In [13]:
for sentence in corpus:

    words = sentence.split()

    for i in range(len(words) - seq_len):

        input_seq = words[i: i + seq_len]

        target_word = words[i+seq_len]

        input_idx = [word_to_idx[word] for word in input_seq]

        target_idx = word_to_idx[target_word]

        data.append((input_idx, target_idx))


input_seqs = [torch.tensor(x) for x , _ in data]

target_words = [torch.tensor(y) for y ,_ in data]

2. Define LSTM Model

In [14]:
class WordLSTM(nn.Module):

    def __init__(self, vocab_size,embedding_dim, hidden_dim):
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)

        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):

        x = self.embedding(x) # [batch, seq, embed]

        output, hidden = self.lstm(x, hidden) # [batch, seq, hidden]

        output = self.fc(output[:,-1,:]) # take last time step

        return output, hidden

3. Training Loop

In [15]:
emebedding_dim = 10

hidden_dim = 32

vocab_size = len(vocab)

learning_rate = 0.01

num_epochs = 300

In [16]:
model = WordLSTM(vocab_size, emebedding_dim, hidden_dim)

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


for epoch in range(1, num_epochs + 1):

    total_loss = 0

    for input_tensor,target_tensor in zip(input_seqs,target_words):

        input_tensor = input_tensor.unsqueeze(0)

        target_tensor = target_tensor.unsqueeze(0)


        optimizer.zero_grad()

        output, _ = model(input_tensor)

        loss = loss_fn(output, target_tensor)

        loss.backward()

        optimizer.step()

        total_loss += loss.item()

        if epoch % 50 == 0:
                
                print(f"Epoch {epoch}/{num_epochs}, Loss: {total_loss:.4f}")

RuntimeError: 0D or 1D target tensor expected, multi-target not supported

4. Text Generation (Sampling with Temperature)

In [None]:
def sample_from_probs(probs, temperature=1.0):
    probs = torch.softmax(probs / temperature, dim=-1).detach().cpu().numpy()
    return np.random.choice(len(probs), p=probs)

def generate_text(model, seed_words, length=6, temperature=1.0):
    model.eval()
    words = seed_words[:]
    hidden = None

    for _ in range(length):
        input_seq = [word_to_idx[w] for w in words[-3:]]
        input_tensor = torch.tensor(input_seq).unsqueeze(0)
        with torch.no_grad():
            output, hidden = model(input_tensor, hidden)
        next_idx = sample_from_probs(output[0], temperature)
        next_word = idx_to_word[next_idx]
        words.append(next_word)
    return " ".join(words)


In [None]:
seed = ["deep", "learning", "is"]
print("\nGenerated Text (temp=1.0):")
print(generate_text(model, seed_words=seed, length=6, temperature=1.0))

print("\nGenerated Text (temp=0.5):")
print(generate_text(model, seed_words=seed, length=6, temperature=0.5))

print("\nGenerated Text (temp=1.5):")
print(generate_text(model, seed_words=seed, length=6, temperature=1.5))