<a href="https://colab.research.google.com/github/yashraj01-se/NLP-with-Deep-Learning/blob/main/n_gram(practical).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import

In [1]:
import torch
from torch import nn

#Data and Preprocessing

In [7]:
#2.Vocabulary:
corpous="""Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse,
and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world.
It is a way I have of driving off the spleen and regulating the circulation.
Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul;
whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet;
and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me
from deliberately stepping into the street, and methodically knocking people’s hats off—then, I account it high time
to get to sea as soon as I can.
"""
words=corpous.split()
vocab=list(set(words))
vocab_size=len(vocab)
word2idx={w:i for i,w in enumerate(vocab)}
idx2word={i:w for w,i in word2idx.items()}

In [13]:
#3.Function defining pair for n gram:
pairs=[]
def pair_maker(text,n):
  for i in range(len(text)-n+1):
    context=text[i:i+n-1]
    target=text[i+n-1]
    pairs.append(([word2idx[w] for w in context],word2idx[target]))

pair_maker(vocab,3)
print(pairs)

[([0, 1], 2), ([1, 2], 3), ([2, 3], 4), ([3, 4], 5), ([4, 5], 6), ([5, 6], 7), ([6, 7], 8), ([7, 8], 9), ([8, 9], 10), ([9, 10], 11), ([10, 11], 12), ([11, 12], 13), ([12, 13], 14), ([13, 14], 15), ([14, 15], 16), ([15, 16], 17), ([16, 17], 18), ([17, 18], 19), ([18, 19], 20), ([19, 20], 21), ([20, 21], 22), ([21, 22], 23), ([22, 23], 24), ([23, 24], 25), ([24, 25], 26), ([25, 26], 27), ([26, 27], 28), ([27, 28], 29), ([28, 29], 30), ([29, 30], 31), ([30, 31], 32), ([31, 32], 33), ([32, 33], 34), ([33, 34], 35), ([34, 35], 36), ([35, 36], 37), ([36, 37], 38), ([37, 38], 39), ([38, 39], 40), ([39, 40], 41), ([40, 41], 42), ([41, 42], 43), ([42, 43], 44), ([43, 44], 45), ([44, 45], 46), ([45, 46], 47), ([46, 47], 48), ([47, 48], 49), ([48, 49], 50), ([49, 50], 51), ([50, 51], 52), ([51, 52], 53), ([52, 53], 54), ([53, 54], 55), ([54, 55], 56), ([55, 56], 57), ([56, 57], 58), ([57, 58], 59), ([58, 59], 60), ([59, 60], 61), ([60, 61], 62), ([61, 62], 63), ([62, 63], 64), ([63, 64], 65), ([

In [18]:
# ---- 2. Parameters ----
embed_size=10
hidden_size=10
n=3
W_embed = torch.randn(vocab_size, embed_size, requires_grad=True)
W_hidden = torch.randn((n-1)*embed_size, hidden_size, requires_grad=True)
b_hidden = torch.zeros(hidden_size, requires_grad=True)
W_out = torch.randn(hidden_size, vocab_size, requires_grad=True)
b_out = torch.zeros(vocab_size, requires_grad=True)

# ---- 3. Training ----
loss_fn=nn.CrossEntropyLoss()
optimizer = torch.optim.SGD([W_embed, W_hidden, b_hidden, W_out, b_out], lr=0.05)
epochs = 200

for epoch in range(epochs):
    total_loss = 0
    for context_idxs, target_idx in pairs:
        # ---- Forward ----
        embeds = W_embed[context_idxs].view(1, -1)  # flatten context embeddings
        h = torch.tanh(embeds @ W_hidden + b_hidden)  # hidden layer
        logits = h @ W_out + b_out                   # output layer
        loss = loss_fn(logits,torch.tensor([target_idx]))

        # ---- Backward ----
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if (epoch+1) % 20 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

# ---- 4. Evaluation ----
print("\nN-gram Predictions:")
for context_idxs, _ in pairs:
    context_words = [idx2word[i] for i in context_idxs]
    embeds = W_embed[context_idxs].view(1, -1)
    h = torch.tanh(embeds @ W_hidden + b_hidden)
    logits = h @ W_out + b_out
    probs = torch.softmax(logits, dim=1)
    top_idx = torch.argmax(probs, dim=1).item()
    predicted_word = idx2word[top_idx]
    print(f"Context: {context_words} -> Predicted: {predicted_word}")


Epoch 20/200, Loss: 100.2392
Epoch 40/200, Loss: 41.8500
Epoch 60/200, Loss: 26.0132
Epoch 80/200, Loss: 17.8003
Epoch 100/200, Loss: 12.5954
Epoch 120/200, Loss: 9.7913
Epoch 140/200, Loss: 8.0767
Epoch 160/200, Loss: 6.8754
Epoch 180/200, Loss: 5.9983
Epoch 200/200, Loss: 5.3364

N-gram Predictions:
Context: ['regulating', 'off—then,'] -> Predicted: way
Context: ['off—then,', 'way'] -> Predicted: precisely—having
Context: ['way', 'precisely—having'] -> Predicted: to
Context: ['precisely—having', 'to'] -> Predicted: from
Context: ['to', 'from'] -> Predicted: particular
Context: ['from', 'particular'] -> Predicted: that
Context: ['particular', 'that'] -> Predicted: account
Context: ['that', 'account'] -> Predicted: little
Context: ['account', 'little'] -> Predicted: high
Context: ['little', 'high'] -> Predicted: would
Context: ['high', 'would'] -> Predicted: part
Context: ['would', 'part'] -> Predicted: people’s
Context: ['part', 'people’s'] -> Predicted: Call
Context: ['people’s', 'Ca