In [None]:
import numpy as np
import torch
from torch import nn
from torch import optim

sentence = "The quick brown fox jumped over the lazy dog"

unique_words = sorted(list(set([word.lower() for word in sentence.split()])))
length = len(unique_words)
word_to_index = {word: i for i, word in enumerate(unique_words)}
index_to_word = {i: word for i, word in enumerate(unique_words)}

vectors = []
vector_template = np.zeros(length)

for index, word in enumerate(unique_words):
    vector = vector_template.copy()
    vector[index] = 1
    vectors.append(vector)

vectors = np.array(vectors)
print(f"Vocabulary size: {length}")
print(vectors)


Vocabulary size: 8
[[1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]]


In [2]:
words = [word.lower() for word in sentence.split()]
completions = [
    {
        "x": "the quick",
        "y": "brown"
    },
    {
        "x": "quick brown",
        "y": "fox"
    }
]

for i in range(2, len(words) - 2):
    completions.append({
        "x": f"{words[i]} {words[i+1]}",
        "y": words[i+2]
    })

def get_input_tensor(text):
    words = text.lower().split()
    # Concatenate vectors for the two input words
    # Each word vector is size 8 (length of vocab)
    # Resulting tensor size is 16
    input_vector = []
    for word in words:
        idx = word_to_index[word]
        input_vector.extend(vectors[idx])
    return torch.tensor(input_vector, dtype=torch.float32).unsqueeze(0) # Add batch dimension

def get_target_index_tensor(target_word):
    # CrossEntropyLoss expects class index, not one-hot vector
    idx = word_to_index[target_word.lower()]
    return torch.tensor([idx], dtype=torch.long)

network = nn.Sequential(
    nn.Linear(16, 8), # input layer
    nn.ReLU(),
    nn.Linear(8, 8), # hidden layer 1
    nn.ReLU(),
    nn.Linear(8, 8),
    nn.ReLU(),
    nn.Linear(8, 8),
    nn.ReLU(),
    nn.Linear(8, 8) 
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(network.parameters(), lr=0.01)

epochs = 500
for epoch in range(epochs):
    total_loss = 0
    for completion in completions:
        input_tensor = get_input_tensor(completion["x"])
        target_tensor = get_target_index_tensor(completion["y"])
        
        optimizer.zero_grad()
        output = network(input_tensor)
        
        loss = criterion(output, target_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 100, Loss: 0.0351
Epoch 200, Loss: 0.0045
Epoch 300, Loss: 0.0016
Epoch 400, Loss: 0.0007
Epoch 500, Loss: 0.0004


In [3]:
def predict(text):
    network.eval()
    with torch.no_grad():
        input_tensor = get_input_tensor(text)
        output_logits = network(input_tensor)
        probabilities = torch.softmax(output_logits, dim=1)
        predicted_idx = torch.argmax(probabilities).item()
        return index_to_word[predicted_idx]

# Test predictions
print("Predictions:")
print(f"jumped over -> {predict('jumped over')}")
print(f"the quick -> {predict('the quick')}")
print(f"fox jumped -> {predict('fox jumped')}")
print(f"over the -> {predict('over the')}")



# Full sentence prediction
print("\n============ full sentence prediction ============")
full_sentence = "the quick"
start = full_sentence

for i in range(7):
    print(i, start)
    prediction = predict(start)
    full_sentence = " ".join(full_sentence.split() + [prediction])
    start = " ".join(full_sentence.split()[-2:])

print(full_sentence)


Predictions:
jumped over -> the
the quick -> brown
fox jumped -> over
over the -> lazy

0 the quick
1 quick brown
2 brown fox
3 fox jumped
4 jumped over
5 over the
6 the lazy
the quick brown fox jumped over the lazy dog
