<a href="https://colab.research.google.com/github/vijaytamilselvan/neural-net-from-scratch/blob/main/Trigram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

# Sample training data
words = ["hello", "hi", "how", "are", "you", "doing", "today"]

# Build character vocabulary
chars = sorted(list(set("".join(words))))
stoi = {s: i + 1 for i, s in enumerate(chars)}  # start indexing from 1
stoi['.'] = 0  # special token for start/end
itos = {i: s for s, i in stoi.items()}
vocab_size = len(stoi)

# Prepare dataset
def build_dataset(words):
    X, Y = [], []
    for word in words:
        context = [0, 0]  # two start tokens
        for ch in word + '.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)
            context = context[1:] + [ix]  # move window
    return torch.tensor(X), torch.tensor(Y)

X, Y = build_dataset(words)

# Define model
class TrigramModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=10, hidden_dim=100):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim * 2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        emb = self.embedding(x)  # (batch, 2, embedding_dim)
        emb = emb.view(emb.shape[0], -1)  # flatten
        h = torch.tanh(self.fc1(emb))
        logits = self.fc2(h)
        return logits

model = TrigramModel(vocab_size)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(200):
    logits = model(X)
    loss = F.cross_entropy(logits, Y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# Generation
def generate_words(n=5):
    for _ in range(n):
        out = []
        context = [0, 0]
        while True:
            x = torch.tensor([context])
            logits = model(x)
            probs = F.softmax(logits, dim=1)
            ix = torch.multinomial(probs, num_samples=1).item()
            if ix == 0:
                break
            out.append(itos[ix])
            context = context[1:] + [ix]
        print(''.join(out))

generate_words(10)


Epoch 0, Loss: 2.8506
Epoch 20, Loss: 0.4566
Epoch 40, Loss: 0.4162
Epoch 60, Loss: 0.4142
Epoch 80, Loss: 0.4138
Epoch 100, Loss: 0.4136
Epoch 120, Loss: 0.4135
Epoch 140, Loss: 0.4134
Epoch 160, Loss: 0.4133
Epoch 180, Loss: 0.4132
you
hello
hello
doing
are
are
how
doing
are
hello
